You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ac...@apache.org on 2012/03/06 01:53:18 UTC
svn commit: r1297312 - in
/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project: ./
hadoop-yarn/dev-support/
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/
hadoop-yarn/had...
Author: acmurthy
Date: Tue Mar 6 00:53:18 2012
New Revision: 1297312
URL: http://svn.apache.org/viewvc?rev=1297312&view=rev
Log:
Merge -c 1297310 from trunk to branch-0.23 to fix MAPREDUCE-3034. Ensure NodeManager reboots itself on direction from ResourceManager. Contributed by Devaraj K & Eric Payne.
Modified:
hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt Tue Mar 6 00:53:18 2012
@@ -142,6 +142,9 @@ Release 0.23.2 - UNRELEASED
MAPREDUCE-3964. ResourceManager does not have JVM metrics (Jason Lowe via
bobby)
+ MAPREDUCE-3034. Ensure NodeManager reboots itself on direction from
+ ResourceManager. (Devaraj K & Eric Payne via acmurthy)
+
Release 0.23.1 - 2012-02-17
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml Tue Mar 6 00:53:18 2012
@@ -198,8 +198,12 @@
<Method name="run" />
<Bug pattern="DM_EXIT" />
</Match>
+ <Match>
+ <Class name="org.apache.hadoop.yarn.server.nodemanager.NodeManager" />
+ <Method name="initAndStartNodeManager" />
+ <Bug pattern="DM_EXIT" />
+ </Match>
-
<!-- Ignore heartbeat exception when killing localizer -->
<Match>
<Class name="org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer" />
Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Mar 6 00:53:18 2012
@@ -60,7 +60,8 @@ public class NodeManager extends Composi
private ApplicationACLsManager aclsManager;
private NodeHealthCheckerService nodeHealthChecker;
private LocalDirsHandlerService dirsHandler;
-
+ private static CompositeServiceShutdownHook nodeManagerShutdownHook;
+
public NodeManager() {
super(NodeManager.class.getName());
}
@@ -226,25 +227,52 @@ public class NodeManager extends Composi
@Override
public void stateChanged(Service service) {
- // Shutdown the Nodemanager when the NodeStatusUpdater is stopped.
if (NodeStatusUpdaterImpl.class.getName().equals(service.getName())
&& STATE.STOPPED.equals(service.getServiceState())) {
+
+ boolean hasToReboot = ((NodeStatusUpdaterImpl) service).hasToRebootNode();
+
+ // Shutdown the Nodemanager when the NodeStatusUpdater is stopped.
stop();
+
+ // Reboot the whole node-manager if NodeStatusUpdater got a reboot command
+ // from the RM.
+ if (hasToReboot) {
+ LOG.info("Rebooting the node manager.");
+ NodeManager nodeManager = createNewNodeManager();
+ nodeManager.initAndStartNodeManager(hasToReboot);
+ }
}
}
- public static void main(String[] args) {
- StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
+ private void initAndStartNodeManager(boolean hasToReboot) {
try {
- NodeManager nodeManager = new NodeManager();
- Runtime.getRuntime().addShutdownHook(
- new CompositeServiceShutdownHook(nodeManager));
+
+ // Remove the old hook if we are rebooting.
+ if (hasToReboot && null != nodeManagerShutdownHook) {
+ Runtime.getRuntime().removeShutdownHook(nodeManagerShutdownHook);
+ }
+
+ nodeManagerShutdownHook = new CompositeServiceShutdownHook(this);
+ Runtime.getRuntime().addShutdownHook(nodeManagerShutdownHook);
+
YarnConfiguration conf = new YarnConfiguration();
- nodeManager.init(conf);
- nodeManager.start();
+ this.init(conf);
+ this.start();
} catch (Throwable t) {
LOG.fatal("Error starting NodeManager", t);
System.exit(-1);
}
}
+
+ // For testing
+ NodeManager createNewNodeManager() {
+ return new NodeManager();
+ }
+
+ public static void main(String[] args) {
+ StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
+ NodeManager nodeManager = new NodeManager();
+ nodeManager.initAndStartNodeManager(false);
+ }
}
Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java Tue Mar 6 00:53:18 2012
@@ -91,6 +91,8 @@ public class NodeStatusUpdaterImpl exten
private final NodeHealthCheckerService healthChecker;
private final NodeManagerMetrics metrics;
+ private boolean hasToRebootNode;
+
public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics,
ContainerTokenSecretManager containerTokenSecretManager) {
@@ -156,6 +158,18 @@ public class NodeStatusUpdaterImpl exten
this.isStopped = true;
super.stop();
}
+
+ private synchronized void reboot() {
+ this.hasToRebootNode = true;
+ // Stop the status-updater. This will trigger a sub-service state change in
+ // the NodeManager which will then decide to reboot or not based on
+ // isRebooted.
+ this.stop();
+ }
+
+ synchronized boolean hasToRebootNode() {
+ return this.hasToRebootNode;
+ }
protected boolean isSecurityEnabled() {
return UserGroupInformation.isSecurityEnabled();
@@ -336,8 +350,8 @@ public class NodeStatusUpdaterImpl exten
}
if (response.getNodeAction() == NodeAction.REBOOT) {
LOG.info("Node is out of sync with ResourceManager,"
- + " hence shutting down.");
- NodeStatusUpdaterImpl.this.stop();
+ + " hence rebooting.");
+ NodeStatusUpdaterImpl.this.reboot();
break;
}
Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java Tue Mar 6 00:53:18 2012
@@ -18,6 +18,8 @@
package org.apache.hadoop.yarn.server.nodemanager;
+import static org.mockito.Mockito.mock;
+
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
@@ -71,7 +73,6 @@ import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import static org.mockito.Mockito.mock;
public class TestNodeStatusUpdater {
@@ -91,6 +92,7 @@ public class TestNodeStatusUpdater {
private final List<NodeId> registeredNodes = new ArrayList<NodeId>();
private final Configuration conf = new YarnConfiguration();
private NodeManager nm;
+ protected NodeManager rebootedNodeManager;
@After
public void tearDown() {
@@ -496,8 +498,28 @@ public class TestNodeStatusUpdater {
LOG.info("Waiting for NM to stop..");
Thread.sleep(1000);
}
-
Assert.assertEquals(STATE.STOPPED, nm.getServiceState());
+
+ waitCount = 0;
+ while (null == rebootedNodeManager && waitCount++ != 20) {
+ LOG.info("Waiting for NM to reinitialize..");
+ Thread.sleep(1000);
+ }
+
+ waitCount = 0;
+ while (rebootedNodeManager.getServiceState() != STATE.STARTED && waitCount++ != 20) {
+ LOG.info("Waiting for NM to start..");
+ Thread.sleep(1000);
+ }
+ Assert.assertEquals(STATE.STARTED, rebootedNodeManager.getServiceState());
+
+ rebootedNodeManager.stop();
+ waitCount = 0;
+ while (rebootedNodeManager.getServiceState() != STATE.STOPPED && waitCount++ != 20) {
+ LOG.info("Waiting for NM to stop..");
+ Thread.sleep(1000);
+ }
+ Assert.assertEquals(STATE.STOPPED, rebootedNodeManager.getServiceState());
}
@Test
@@ -642,6 +664,12 @@ public class TestNodeStatusUpdater {
myNodeStatusUpdater.resourceTracker = myResourceTracker2;
return myNodeStatusUpdater;
}
+
+ @Override
+ NodeManager createNewNodeManager() {
+ rebootedNodeManager = getNodeManager(NodeAction.NORMAL);
+ return rebootedNodeManager;
+ }
};
}
}