You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ac...@apache.org on 2012/03/06 01:53:18 UTC

svn commit: r1297312 - in /hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project: ./ hadoop-yarn/dev-support/ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ hadoop-yarn/had...

Author: acmurthy
Date: Tue Mar  6 00:53:18 2012
New Revision: 1297312

URL: http://svn.apache.org/viewvc?rev=1297312&view=rev
Log:
Merge -c 1297310 from trunk to branch-0.23 to fix MAPREDUCE-3034. Ensure NodeManager reboots itself on direction from ResourceManager. Contributed by Devaraj K & Eric Payne.

Modified:
    hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
    hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
    hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
    hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java

Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/CHANGES.txt Tue Mar  6 00:53:18 2012
@@ -142,6 +142,9 @@ Release 0.23.2 - UNRELEASED
     MAPREDUCE-3964. ResourceManager does not have JVM metrics (Jason Lowe via
     bobby)
 
+    MAPREDUCE-3034. Ensure NodeManager reboots itself on direction from
+    ResourceManager. (Devaraj K & Eric Payne via acmurthy) 
+
 Release 0.23.1 - 2012-02-17
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml Tue Mar  6 00:53:18 2012
@@ -198,8 +198,12 @@
     <Method name="run" />
     <Bug pattern="DM_EXIT" />
   </Match>
+  <Match>
+    <Class name="org.apache.hadoop.yarn.server.nodemanager.NodeManager" />
+    <Method name="initAndStartNodeManager" />
+    <Bug pattern="DM_EXIT" />
+  </Match>
  
-
   <!-- Ignore heartbeat exception when killing localizer -->
   <Match>
     <Class name="org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer" />

Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Mar  6 00:53:18 2012
@@ -60,7 +60,8 @@ public class NodeManager extends Composi
   private ApplicationACLsManager aclsManager;
   private NodeHealthCheckerService nodeHealthChecker;
   private LocalDirsHandlerService dirsHandler;
-
+  private static CompositeServiceShutdownHook nodeManagerShutdownHook; 
+  
   public NodeManager() {
     super(NodeManager.class.getName());
   }
@@ -226,25 +227,52 @@ public class NodeManager extends Composi
 
   @Override
   public void stateChanged(Service service) {
-    // Shutdown the Nodemanager when the NodeStatusUpdater is stopped.
     if (NodeStatusUpdaterImpl.class.getName().equals(service.getName())
         && STATE.STOPPED.equals(service.getServiceState())) {
+
+      boolean hasToReboot = ((NodeStatusUpdaterImpl) service).hasToRebootNode();
+
+      // Shutdown the Nodemanager when the NodeStatusUpdater is stopped.      
       stop();
+
+      // Reboot the whole node-manager if NodeStatusUpdater got a reboot command
+      // from the RM.
+      if (hasToReboot) {
+        LOG.info("Rebooting the node manager.");
+        NodeManager nodeManager = createNewNodeManager();
+        nodeManager.initAndStartNodeManager(hasToReboot);
+      }
     }
   }
   
-  public static void main(String[] args) {
-    StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
+  private void initAndStartNodeManager(boolean hasToReboot) {
     try {
-      NodeManager nodeManager = new NodeManager();
-      Runtime.getRuntime().addShutdownHook(
-          new CompositeServiceShutdownHook(nodeManager));
+
+      // Remove the old hook if we are rebooting.
+      if (hasToReboot && null != nodeManagerShutdownHook) {
+        Runtime.getRuntime().removeShutdownHook(nodeManagerShutdownHook);
+      }
+
+      nodeManagerShutdownHook = new CompositeServiceShutdownHook(this);
+      Runtime.getRuntime().addShutdownHook(nodeManagerShutdownHook);
+
       YarnConfiguration conf = new YarnConfiguration();
-      nodeManager.init(conf);
-      nodeManager.start();
+      this.init(conf);
+      this.start();
     } catch (Throwable t) {
       LOG.fatal("Error starting NodeManager", t);
       System.exit(-1);
     }
   }
+
+  // For testing
+  NodeManager createNewNodeManager() {
+    return new NodeManager();
+  }
+
+  public static void main(String[] args) {
+    StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
+    NodeManager nodeManager = new NodeManager();
+    nodeManager.initAndStartNodeManager(false);
+  }
 }

Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java Tue Mar  6 00:53:18 2012
@@ -91,6 +91,8 @@ public class NodeStatusUpdaterImpl exten
   private final NodeHealthCheckerService healthChecker;
   private final NodeManagerMetrics metrics;
 
+  private boolean hasToRebootNode;
+  
   public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
       NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics, 
       ContainerTokenSecretManager containerTokenSecretManager) {
@@ -156,6 +158,18 @@ public class NodeStatusUpdaterImpl exten
     this.isStopped = true;
     super.stop();
   }
+  
+  private synchronized void reboot() {
+    this.hasToRebootNode = true;
+    // Stop the status-updater. This will trigger a sub-service state change in
+    // the NodeManager which will then decide to reboot or not based on
+    // isRebooted.
+    this.stop();
+  }
+
+  synchronized boolean hasToRebootNode() {
+    return this.hasToRebootNode;
+  }
 
   protected boolean isSecurityEnabled() {
     return UserGroupInformation.isSecurityEnabled();
@@ -336,8 +350,8 @@ public class NodeStatusUpdaterImpl exten
             }
             if (response.getNodeAction() == NodeAction.REBOOT) {
               LOG.info("Node is out of sync with ResourceManager,"
-                  + " hence shutting down.");
-              NodeStatusUpdaterImpl.this.stop();
+                  + " hence rebooting.");
+              NodeStatusUpdaterImpl.this.reboot();
               break;
             }
 

Modified: hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java?rev=1297312&r1=1297311&r2=1297312&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java (original)
+++ hadoop/common/branches/branch-0.23.2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java Tue Mar  6 00:53:18 2012
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.yarn.server.nodemanager;
 
+import static org.mockito.Mockito.mock;
+
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
@@ -71,7 +73,6 @@ import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
-import static org.mockito.Mockito.mock;
 
 public class TestNodeStatusUpdater {
 
@@ -91,6 +92,7 @@ public class TestNodeStatusUpdater {
   private final List<NodeId> registeredNodes = new ArrayList<NodeId>();
   private final Configuration conf = new YarnConfiguration();
   private NodeManager nm;
+  protected NodeManager rebootedNodeManager;
 
   @After
   public void tearDown() {
@@ -496,8 +498,28 @@ public class TestNodeStatusUpdater {
       LOG.info("Waiting for NM to stop..");
       Thread.sleep(1000);
     }
-
     Assert.assertEquals(STATE.STOPPED, nm.getServiceState());
+    
+    waitCount = 0;
+    while (null == rebootedNodeManager && waitCount++ != 20) {
+      LOG.info("Waiting for NM to reinitialize..");
+      Thread.sleep(1000);
+    }
+      
+    waitCount = 0;
+    while (rebootedNodeManager.getServiceState() != STATE.STARTED && waitCount++ != 20) {
+      LOG.info("Waiting for NM to start..");
+      Thread.sleep(1000);
+    }
+    Assert.assertEquals(STATE.STARTED, rebootedNodeManager.getServiceState());
+
+    rebootedNodeManager.stop();
+    waitCount = 0;
+    while (rebootedNodeManager.getServiceState() != STATE.STOPPED && waitCount++ != 20) {
+      LOG.info("Waiting for NM to stop..");
+      Thread.sleep(1000);
+    }
+    Assert.assertEquals(STATE.STOPPED, rebootedNodeManager.getServiceState());
   }
   
   @Test
@@ -642,6 +664,12 @@ public class TestNodeStatusUpdater {
         myNodeStatusUpdater.resourceTracker = myResourceTracker2;
         return myNodeStatusUpdater;
       }
+
+      @Override
+      NodeManager createNewNodeManager() {
+        rebootedNodeManager = getNodeManager(NodeAction.NORMAL);
+        return rebootedNodeManager;
+      }
     };
   }
 }