You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by ka...@apache.org on 2014/04/08 19:15:58 UTC

svn commit: r1585783 - in /hadoop/common/trunk/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ hadoop-yarn/hadoop-yarn-common/sr...

Author: kasha
Date: Tue Apr  8 17:15:58 2014
New Revision: 1585783

URL: http://svn.apache.org/r1585783
Log:
YARN-1757. NM Recovery. Auxiliary service support. (Jason Lowe via kasha)

Modified:
    hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java
    hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java

Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Tue Apr  8 17:15:58 2014
@@ -21,6 +21,8 @@ Release 2.5.0 - UNRELEASED
 
   NEW FEATURES
 
+    YARN-1757. NM Recovery. Auxiliary service support. (Jason Lowe via kasha)
+
   IMPROVEMENTS
 
     YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java Tue Apr  8 17:15:58 2014
@@ -884,6 +884,13 @@ public class YarnConfiguration extends C
   
   public static final String DEFAULT_NM_USER_HOME_DIR= "/home/";
 
+  public static final String NM_RECOVERY_PREFIX = NM_PREFIX + "recovery.";
+  public static final String NM_RECOVERY_ENABLED =
+      NM_RECOVERY_PREFIX + "enabled";
+  public static final boolean DEFAULT_NM_RECOVERY_ENABLED = false;
+
+  public static final String NM_RECOVERY_DIR = NM_RECOVERY_PREFIX + "dir";
+
   ////////////////////////////////
   // Web Proxy Configs
   ////////////////////////////////

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java Tue Apr  8 17:15:58 2014
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
 
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
@@ -38,11 +39,22 @@ import org.apache.hadoop.yarn.conf.YarnC
 @Evolving
 public abstract class AuxiliaryService extends AbstractService {
 
+  private Path recoveryPath = null;
+
   protected AuxiliaryService(String name) {
     super(name);
   }
 
   /**
+   * Get the path specific to this auxiliary service to use for recovery.
+   *
+   * @return state storage path or null if recovery is not enabled
+   */
+  protected Path getRecoveryPath() {
+    return recoveryPath;
+  }
+
+  /**
    * A new application is started on this NodeManager. This is a signal to
    * this {@link AuxiliaryService} about the application initialization.
    * 
@@ -102,4 +114,13 @@ public abstract class AuxiliaryService e
   public void stopContainer(ContainerTerminationContext stopContainerContext) {
   }
 
+  /**
+   * Set the path for this auxiliary service to use for storing state
+   * that will be used during recovery.
+   *
+   * @param recoveryPath where recoverable state should be stored
+   */
+  public void setRecoveryPath(Path recoveryPath) {
+    this.recoveryPath = recoveryPath;
+  }
 }

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml Tue Apr  8 17:15:58 2014
@@ -1019,6 +1019,19 @@
   	<value>500</value>
   </property>
   
+  <property>
+    <description>Enable the node manager to recover after starting</description>
+    <name>yarn.nodemanager.recovery.enabled</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <description>The local filesystem directory in which the node manager will
+    store state when recovery is enabled.</description>
+    <name>yarn.nodemanager.recovery.dir</name>
+    <value>${hadoop.tmp.dir}/yarn-nm-recovery</value>
+  </property>
+
   <!--Map Reduce configuration-->
   <property>
     <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Apr  8 17:15:58 2014
@@ -28,6 +28,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.service.CompositeService;
@@ -127,6 +130,20 @@ public class NodeManager extends Composi
 
     conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
 
+    boolean recoveryEnabled = conf.getBoolean(
+        YarnConfiguration.NM_RECOVERY_ENABLED,
+        YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
+    if (recoveryEnabled) {
+      FileSystem recoveryFs = FileSystem.getLocal(conf);
+      String recoveryDirName = conf.get(YarnConfiguration.NM_RECOVERY_DIR);
+      if (recoveryDirName == null) {
+        throw new IllegalArgumentException("Recovery is enabled but " +
+            YarnConfiguration.NM_RECOVERY_DIR + " is not set.");
+      }
+      Path recoveryRoot = new Path(recoveryDirName);
+      recoveryFs.mkdirs(recoveryRoot, new FsPermission((short)0700));
+    }
+
     NMContainerTokenSecretManager containerTokenSecretManager =
         new NMContainerTokenSecretManager(conf);
 

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java Tue Apr  8 17:15:58 2014
@@ -29,15 +29,18 @@ import java.util.regex.Pattern;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.service.ServiceStateChangeListener;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
 import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
 import org.apache.hadoop.yarn.server.api.AuxiliaryService;
-import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
 import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
 import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
 
@@ -46,6 +49,8 @@ import com.google.common.base.Preconditi
 public class AuxServices extends AbstractService
     implements ServiceStateChangeListener, EventHandler<AuxServicesEvent> {
 
+  static final String STATE_STORE_ROOT_NAME = "nm-aux-services";
+
   private static final Log LOG = LogFactory.getLog(AuxServices.class);
 
   protected final Map<String,AuxiliaryService> serviceMap;
@@ -91,6 +96,17 @@ public class AuxServices extends Abstrac
 
   @Override
   public void serviceInit(Configuration conf) throws Exception {
+    final FsPermission storeDirPerms = new FsPermission((short)0700);
+    Path stateStoreRoot = null;
+    FileSystem stateStoreFs = null;
+    boolean recoveryEnabled = conf.getBoolean(
+        YarnConfiguration.NM_RECOVERY_ENABLED,
+        YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
+    if (recoveryEnabled) {
+      stateStoreRoot = new Path(conf.get(YarnConfiguration.NM_RECOVERY_DIR),
+          STATE_STORE_ROOT_NAME);
+      stateStoreFs = FileSystem.getLocal(conf);
+    }
     Collection<String> auxNames = conf.getStringCollection(
         YarnConfiguration.NM_AUX_SERVICES);
     for (final String sName : auxNames) {
@@ -119,6 +135,11 @@ public class AuxServices extends Abstrac
                   +"the name in the config.");
         }
         addService(sName, s);
+        if (recoveryEnabled) {
+          Path storePath = new Path(stateStoreRoot, sName);
+          stateStoreFs.mkdirs(storePath, storeDirPerms);
+          s.setRecoveryPath(storePath);
+        }
         s.init(conf);
       } catch (RuntimeException e) {
         LOG.fatal("Failed to initialize " + sName, e);

Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java Tue Apr  8 17:15:58 2014
@@ -26,6 +26,8 @@ import static org.junit.Assert.assertNul
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.File;
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -36,6 +38,10 @@ import org.junit.Assert;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -56,6 +62,10 @@ import org.junit.Test;
 
 public class TestAuxServices {
   private static final Log LOG = LogFactory.getLog(TestAuxServices.class);
+  private static final File TEST_DIR = new File(
+      System.getProperty("test.build.data",
+          System.getProperty("java.io.tmpdir")),
+      TestAuxServices.class.getName());
 
   static class LightService extends AuxiliaryService implements Service
        {
@@ -319,4 +329,81 @@ public class TestAuxServices {
           "should only contain a-zA-Z0-9_ and can not start with numbers"));
     }
   }
+
+  @Test
+  public void testAuxServiceRecoverySetup() throws IOException {
+    Configuration conf = new YarnConfiguration();
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+    conf.set(YarnConfiguration.NM_RECOVERY_DIR, TEST_DIR.toString());
+    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
+        new String[] { "Asrv", "Bsrv" });
+    conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Asrv"),
+        RecoverableServiceA.class, Service.class);
+    conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"),
+        RecoverableServiceB.class, Service.class);
+    try {
+      final AuxServices aux = new AuxServices();
+      aux.init(conf);
+      Assert.assertEquals(2, aux.getServices().size());
+      File auxStorageDir = new File(TEST_DIR,
+          AuxServices.STATE_STORE_ROOT_NAME);
+      Assert.assertEquals(2, auxStorageDir.listFiles().length);
+      aux.close();
+    } finally {
+      FileUtil.fullyDelete(TEST_DIR);
+    }
+  }
+
+  static class RecoverableAuxService extends AuxiliaryService {
+    static final FsPermission RECOVERY_PATH_PERMS =
+        new FsPermission((short)0700);
+
+    String auxName;
+
+    RecoverableAuxService(String name, String auxName) {
+      super(name);
+      this.auxName = auxName;
+    }
+
+    @Override
+    protected void serviceInit(Configuration conf) throws Exception {
+      super.serviceInit(conf);
+      Path storagePath = getRecoveryPath();
+      Assert.assertNotNull("Recovery path not present when aux service inits",
+          storagePath);
+      Assert.assertTrue(storagePath.toString().contains(auxName));
+      FileSystem fs = FileSystem.getLocal(conf);
+      Assert.assertTrue("Recovery path does not exist",
+          fs.exists(storagePath));
+      Assert.assertEquals("Recovery path has wrong permissions",
+          new FsPermission((short)0700),
+          fs.getFileStatus(storagePath).getPermission());
+    }
+
+    @Override
+    public void initializeApplication(
+        ApplicationInitializationContext initAppContext) {
+    }
+
+    @Override
+    public void stopApplication(ApplicationTerminationContext stopAppContext) {
+    }
+
+    @Override
+    public ByteBuffer getMetaData() {
+      return null;
+    }
+  }
+
+  static class RecoverableServiceA extends RecoverableAuxService {
+    RecoverableServiceA() {
+      super("RecoverableServiceA", "Asrv");
+    }
+  }
+
+  static class RecoverableServiceB extends RecoverableAuxService {
+    RecoverableServiceB() {
+      super("RecoverableServiceB", "Bsrv");
+    }
+  }
 }