You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by ka...@apache.org on 2014/04/08 19:15:58 UTC
svn commit: r1585783 - in /hadoop/common/trunk/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/
hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/
hadoop-yarn/hadoop-yarn-common/sr...
Author: kasha
Date: Tue Apr 8 17:15:58 2014
New Revision: 1585783
URL: http://svn.apache.org/r1585783
Log:
YARN-1757. NM Recovery. Auxiliary service support. (Jason Lowe via kasha)
Modified:
hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Tue Apr 8 17:15:58 2014
@@ -21,6 +21,8 @@ Release 2.5.0 - UNRELEASED
NEW FEATURES
+ YARN-1757. NM Recovery. Auxiliary service support. (Jason Lowe via kasha)
+
IMPROVEMENTS
YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java Tue Apr 8 17:15:58 2014
@@ -884,6 +884,13 @@ public class YarnConfiguration extends C
public static final String DEFAULT_NM_USER_HOME_DIR= "/home/";
+ public static final String NM_RECOVERY_PREFIX = NM_PREFIX + "recovery.";
+ public static final String NM_RECOVERY_ENABLED =
+ NM_RECOVERY_PREFIX + "enabled";
+ public static final boolean DEFAULT_NM_RECOVERY_ENABLED = false;
+
+ public static final String NM_RECOVERY_DIR = NM_RECOVERY_PREFIX + "dir";
+
////////////////////////////////
// Web Proxy Configs
////////////////////////////////
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/AuxiliaryService.java Tue Apr 8 17:15:58 2014
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
@@ -38,11 +39,22 @@ import org.apache.hadoop.yarn.conf.YarnC
@Evolving
public abstract class AuxiliaryService extends AbstractService {
+ private Path recoveryPath = null;
+
protected AuxiliaryService(String name) {
super(name);
}
/**
+ * Get the path specific to this auxiliary service to use for recovery.
+ *
+ * @return state storage path or null if recovery is not enabled
+ */
+ protected Path getRecoveryPath() {
+ return recoveryPath;
+ }
+
+ /**
* A new application is started on this NodeManager. This is a signal to
* this {@link AuxiliaryService} about the application initialization.
*
@@ -102,4 +114,13 @@ public abstract class AuxiliaryService e
public void stopContainer(ContainerTerminationContext stopContainerContext) {
}
+ /**
+ * Set the path for this auxiliary service to use for storing state
+ * that will be used during recovery.
+ *
+ * @param recoveryPath where recoverable state should be stored
+ */
+ public void setRecoveryPath(Path recoveryPath) {
+ this.recoveryPath = recoveryPath;
+ }
}
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml Tue Apr 8 17:15:58 2014
@@ -1019,6 +1019,19 @@
<value>500</value>
</property>
+ <property>
+ <description>Enable the node manager to recover after starting</description>
+ <name>yarn.nodemanager.recovery.enabled</name>
+ <value>false</value>
+ </property>
+
+ <property>
+ <description>The local filesystem directory in which the node manager will
+ store state when recovery is enabled.</description>
+ <name>yarn.nodemanager.recovery.dir</name>
+ <value>${hadoop.tmp.dir}/yarn-nm-recovery</value>
+ </property>
+
<!--Map Reduce configuration-->
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Apr 8 17:15:58 2014
@@ -28,6 +28,9 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.service.CompositeService;
@@ -127,6 +130,20 @@ public class NodeManager extends Composi
conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
+ boolean recoveryEnabled = conf.getBoolean(
+ YarnConfiguration.NM_RECOVERY_ENABLED,
+ YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
+ if (recoveryEnabled) {
+ FileSystem recoveryFs = FileSystem.getLocal(conf);
+ String recoveryDirName = conf.get(YarnConfiguration.NM_RECOVERY_DIR);
+ if (recoveryDirName == null) {
+ throw new IllegalArgumentException("Recovery is enabled but " +
+ YarnConfiguration.NM_RECOVERY_DIR + " is not set.");
+ }
+ Path recoveryRoot = new Path(recoveryDirName);
+ recoveryFs.mkdirs(recoveryRoot, new FsPermission((short)0700));
+ }
+
NMContainerTokenSecretManager containerTokenSecretManager =
new NMContainerTokenSecretManager(conf);
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java Tue Apr 8 17:15:58 2014
@@ -29,15 +29,18 @@ import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.service.ServiceStateChangeListener;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
import org.apache.hadoop.yarn.server.api.AuxiliaryService;
-import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
@@ -46,6 +49,8 @@ import com.google.common.base.Preconditi
public class AuxServices extends AbstractService
implements ServiceStateChangeListener, EventHandler<AuxServicesEvent> {
+ static final String STATE_STORE_ROOT_NAME = "nm-aux-services";
+
private static final Log LOG = LogFactory.getLog(AuxServices.class);
protected final Map<String,AuxiliaryService> serviceMap;
@@ -91,6 +96,17 @@ public class AuxServices extends Abstrac
@Override
public void serviceInit(Configuration conf) throws Exception {
+ final FsPermission storeDirPerms = new FsPermission((short)0700);
+ Path stateStoreRoot = null;
+ FileSystem stateStoreFs = null;
+ boolean recoveryEnabled = conf.getBoolean(
+ YarnConfiguration.NM_RECOVERY_ENABLED,
+ YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
+ if (recoveryEnabled) {
+ stateStoreRoot = new Path(conf.get(YarnConfiguration.NM_RECOVERY_DIR),
+ STATE_STORE_ROOT_NAME);
+ stateStoreFs = FileSystem.getLocal(conf);
+ }
Collection<String> auxNames = conf.getStringCollection(
YarnConfiguration.NM_AUX_SERVICES);
for (final String sName : auxNames) {
@@ -119,6 +135,11 @@ public class AuxServices extends Abstrac
+"the name in the config.");
}
addService(sName, s);
+ if (recoveryEnabled) {
+ Path storePath = new Path(stateStoreRoot, sName);
+ stateStoreFs.mkdirs(storePath, storeDirPerms);
+ s.setRecoveryPath(storePath);
+ }
s.init(conf);
} catch (RuntimeException e) {
LOG.fatal("Failed to initialize " + sName, e);
Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java?rev=1585783&r1=1585782&r2=1585783&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java (original)
+++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java Tue Apr 8 17:15:58 2014
@@ -26,6 +26,8 @@ import static org.junit.Assert.assertNul
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import java.io.File;
+import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
@@ -36,6 +38,10 @@ import org.junit.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -56,6 +62,10 @@ import org.junit.Test;
public class TestAuxServices {
private static final Log LOG = LogFactory.getLog(TestAuxServices.class);
+ private static final File TEST_DIR = new File(
+ System.getProperty("test.build.data",
+ System.getProperty("java.io.tmpdir")),
+ TestAuxServices.class.getName());
static class LightService extends AuxiliaryService implements Service
{
@@ -319,4 +329,81 @@ public class TestAuxServices {
"should only contain a-zA-Z0-9_ and can not start with numbers"));
}
}
+
+ @Test
+ public void testAuxServiceRecoverySetup() throws IOException {
+ Configuration conf = new YarnConfiguration();
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+ conf.set(YarnConfiguration.NM_RECOVERY_DIR, TEST_DIR.toString());
+ conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
+ new String[] { "Asrv", "Bsrv" });
+ conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Asrv"),
+ RecoverableServiceA.class, Service.class);
+ conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"),
+ RecoverableServiceB.class, Service.class);
+ try {
+ final AuxServices aux = new AuxServices();
+ aux.init(conf);
+ Assert.assertEquals(2, aux.getServices().size());
+ File auxStorageDir = new File(TEST_DIR,
+ AuxServices.STATE_STORE_ROOT_NAME);
+ Assert.assertEquals(2, auxStorageDir.listFiles().length);
+ aux.close();
+ } finally {
+ FileUtil.fullyDelete(TEST_DIR);
+ }
+ }
+
+ static class RecoverableAuxService extends AuxiliaryService {
+ static final FsPermission RECOVERY_PATH_PERMS =
+ new FsPermission((short)0700);
+
+ String auxName;
+
+ RecoverableAuxService(String name, String auxName) {
+ super(name);
+ this.auxName = auxName;
+ }
+
+ @Override
+ protected void serviceInit(Configuration conf) throws Exception {
+ super.serviceInit(conf);
+ Path storagePath = getRecoveryPath();
+ Assert.assertNotNull("Recovery path not present when aux service inits",
+ storagePath);
+ Assert.assertTrue(storagePath.toString().contains(auxName));
+ FileSystem fs = FileSystem.getLocal(conf);
+ Assert.assertTrue("Recovery path does not exist",
+ fs.exists(storagePath));
+ Assert.assertEquals("Recovery path has wrong permissions",
+ new FsPermission((short)0700),
+ fs.getFileStatus(storagePath).getPermission());
+ }
+
+ @Override
+ public void initializeApplication(
+ ApplicationInitializationContext initAppContext) {
+ }
+
+ @Override
+ public void stopApplication(ApplicationTerminationContext stopAppContext) {
+ }
+
+ @Override
+ public ByteBuffer getMetaData() {
+ return null;
+ }
+ }
+
+ static class RecoverableServiceA extends RecoverableAuxService {
+ RecoverableServiceA() {
+ super("RecoverableServiceA", "Asrv");
+ }
+ }
+
+ static class RecoverableServiceB extends RecoverableAuxService {
+ RecoverableServiceB() {
+ super("RecoverableServiceB", "Bsrv");
+ }
+ }
}