You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2018/01/18 23:47:47 UTC
[16/49] hadoop git commit: YARN-5366. Improve signal handling and
delete delay for Docker on Yarn. (Contributed by Shane Kumpf)
YARN-5366. Improve signal handling and delete delay for Docker on Yarn.
(Contributed by Shane Kumpf)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3d65dbe0
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3d65dbe0
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3d65dbe0
Branch: refs/heads/YARN-7402
Commit: 3d65dbe032e202361d613344ccc6d9c5f99ba395
Parents: 4fb1f45
Author: Eric Yang <ey...@apache.org>
Authored: Fri Jan 12 13:41:27 2018 -0500
Committer: Eric Yang <ey...@apache.org>
Committed: Fri Jan 12 13:41:27 2018 -0500
----------------------------------------------------------------------
.../hadoop/yarn/conf/YarnConfiguration.java | 14 +
.../src/main/resources/yarn-default.xml | 8 +
.../server/nodemanager/ContainerExecutor.java | 11 +
.../hadoop/yarn/server/nodemanager/Context.java | 7 +
.../nodemanager/DefaultContainerExecutor.java | 12 +
.../nodemanager/LinuxContainerExecutor.java | 58 ++++
.../yarn/server/nodemanager/NodeManager.java | 20 ++
.../nodemanager/api/impl/pb/NMProtoUtils.java | 37 ++-
.../container/ContainerImpl.java | 26 ++
.../deletion/task/DeletionTaskType.java | 2 +-
.../task/DockerContainerDeletionTask.java | 92 ++++++
.../launcher/ContainerLaunch.java | 108 +++++--
.../runtime/DockerLinuxContainerRuntime.java | 182 +++++++++---
.../runtime/docker/DockerCommandExecutor.java | 39 ++-
.../linux/runtime/docker/DockerKillCommand.java | 40 +++
.../executor/ContainerReapContext.java | 93 ++++++
.../impl/container-executor.c | 66 +++--
.../container-executor/impl/utils/docker-util.c | 62 ++++
.../container-executor/impl/utils/docker-util.h | 12 +
.../container-executor/impl/utils/path-utils.c | 14 +
.../container-executor/impl/utils/path-utils.h | 7 +
.../test/utils/test-path-utils.cc | 12 +
.../test/utils/test_docker_util.cc | 26 ++
.../yarn_server_nodemanager_recovery.proto | 1 +
.../nodemanager/TestContainerExecutor.java | 12 +-
.../nodemanager/TestLinuxContainerExecutor.java | 26 ++
.../amrmproxy/BaseAMRMProxyTest.java | 6 +
.../api/impl/pb/TestNMProtoUtils.java | 24 ++
.../container/TestContainer.java | 286 ++++++++++++++++++-
.../task/DockerContainerDeletionMatcher.java | 49 ++++
.../task/TestDockerContainerDeletionTask.java | 65 +++++
.../runtime/TestDockerContainerRuntime.java | 135 ++++++++-
.../docker/TestDockerCommandExecutor.java | 166 +++++++++--
.../runtime/docker/TestDockerKillCommand.java | 61 ++++
.../TestContainersMonitorResourceChange.java | 6 +
.../executor/TestContainerReapContext.java | 53 ++++
.../src/site/markdown/DockerContainers.md | 1 +
37 files changed, 1718 insertions(+), 121 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 1b6bd0e..fdb5859 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1811,6 +1811,20 @@ public class YarnConfiguration extends Configuration {
public static final String DEFAULT_NM_DOCKER_DEFAULT_CONTAINER_NETWORK =
"host";
+ /**
+ * Whether or not users are allowed to request that Docker containers honor
+ * the debug deletion delay. This is useful for troubleshooting Docker
+ * container related launch failures.
+ */
+ public static final String NM_DOCKER_ALLOW_DELAYED_REMOVAL =
+ DOCKER_CONTAINER_RUNTIME_PREFIX + "delayed-removal.allowed";
+
+ /**
+ * The default value on whether or not a user can request that Docker
+ * containers honor the debug deletion delay.
+ */
+ public static final boolean DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL = false;
+
/** The mode in which the Java Container Sandbox should run detailed by
* the JavaSandboxLinuxContainerRuntime. */
public static final String YARN_CONTAINER_SANDBOX =
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index d450eca..017799a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1697,6 +1697,14 @@
</property>
<property>
+ <description>Whether or not users are allowed to request that Docker
+ containers honor the debug deletion delay. This is useful for
+ troubleshooting Docker container related launch failures.</description>
+ <name>yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed</name>
+ <value>false</value>
+ </property>
+
+ <property>
<description>The mode in which the Java Container Sandbox should run detailed by
the JavaSandboxLinuxContainerRuntime.</description>
<name>yarn.nodemanager.runtime.linux.sandbox-mode</name>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 7ee3e52..f4279a3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContex
import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
@@ -190,6 +191,16 @@ public abstract class ContainerExecutor implements Configurable {
throws IOException;
/**
+ * Perform the steps necessary to reap the container.
+ *
+ * @param ctx Encapsulates information necessary for reaping containers.
+ * @return returns true if the operation succeeded.
+ * @throws IOException if reaping the container fails.
+ */
+ public abstract boolean reapContainer(ContainerReapContext ctx)
+ throws IOException;
+
+ /**
* Delete specified directories as a given user.
*
* @param ctx Encapsulates information necessary for deletion.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index 6774cf0..d7e3b52 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -128,4 +128,11 @@ public interface Context {
ResourcePluginManager getResourcePluginManager();
NodeManagerMetrics getNodeManagerMetrics();
+
+ /**
+ * Get the {@code DeletionService} associated with the NM.
+ *
+ * @return the NM {@code DeletionService}.
+ */
+ DeletionService getDeletionService();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
index 5772403..828b8fd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
@@ -565,6 +566,17 @@ public class DefaultContainerExecutor extends ContainerExecutor {
return true;
}
+ /**
+ * No-op for reaping containers within the DefaultContainerExecutor.
+ *
+ * @param ctx Encapsulates information necessary for reaping containers.
+ * @return true given no operations are needed.
+ */
+ @Override
+ public boolean reapContainer(ContainerReapContext ctx) {
+ return true;
+ }
+
@Override
public boolean isContainerAlive(ContainerLivenessContext ctx)
throws IOException {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index eaf664f..fc5a04d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -44,12 +44,15 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DelegatingLinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRmCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
@@ -731,6 +734,39 @@ public class LinuxContainerExecutor extends ContainerExecutor {
return true;
}
+ /**
+ * Performs the tasks necessary to reap the container.
+ *
+ * @param ctx Encapsulates information necessary for reaping containers.
+ * @return true if the reaping was successful.
+ * @throws IOException if an error occurs while reaping the container.
+ */
+ @Override
+ public boolean reapContainer(ContainerReapContext ctx) throws IOException {
+ Container container = ctx.getContainer();
+ String user = ctx.getUser();
+ String runAsUser = getRunAsUser(user);
+ ContainerRuntimeContext runtimeContext = new ContainerRuntimeContext
+ .Builder(container)
+ .setExecutionAttribute(RUN_AS_USER, runAsUser)
+ .setExecutionAttribute(USER, user)
+ .build();
+ try {
+ linuxContainerRuntime.reapContainer(runtimeContext);
+ } catch (ContainerExecutionException e) {
+ int retCode = e.getExitCode();
+ if (retCode != 0) {
+ return false;
+ }
+ LOG.warn("Error in reaping container "
+ + container.getContainerId().toString() + " exit = " + retCode, e);
+ logOutput(e.getOutput());
+ throw new IOException("Error in reaping container "
+ + container.getContainerId().toString() + " exit = " + retCode, e);
+ }
+ return true;
+ }
+
@Override
public void deleteAsUser(DeletionAsUserContext ctx) {
String user = ctx.getUser();
@@ -875,4 +911,26 @@ public class LinuxContainerExecutor extends ContainerExecutor {
public ResourceHandler getResourceHandler() {
return resourceHandlerChain;
}
+
+ /**
+ * Remove the docker container referenced in the context.
+ *
+ * @param containerId the containerId for the container.
+ */
+ public void removeDockerContainer(String containerId) {
+ try {
+ PrivilegedOperationExecutor privOpExecutor =
+ PrivilegedOperationExecutor.getInstance(super.getConf());
+ if (DockerCommandExecutor.isRemovable(
+ DockerCommandExecutor.getContainerStatus(containerId,
+ super.getConf(), privOpExecutor))) {
+ LOG.info("Removing Docker container : " + containerId);
+ DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId);
+ DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId,
+ null, super.getConf(), privOpExecutor, false);
+ }
+ } catch (ContainerExecutionException e) {
+ LOG.warn("Unable to remove docker container: " + containerId);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 6cb8560..5cacd20 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -400,6 +400,7 @@ public class NodeManager extends CompositeService
((NMContext)context).setContainerExecutor(exec);
+ ((NMContext)context).setDeletionService(del);
nodeLabelsProvider = createNodeLabelsProvider(conf);
@@ -611,6 +612,7 @@ public class NodeManager extends CompositeService
logAggregationReportForApps;
private NodeStatusUpdater nodeStatusUpdater;
private final boolean isDistSchedulingEnabled;
+ private DeletionService deletionService;
private OpportunisticContainerAllocator containerAllocator;
@@ -845,6 +847,24 @@ public class NodeManager extends CompositeService
ResourcePluginManager resourcePluginManager) {
this.resourcePluginManager = resourcePluginManager;
}
+
+ /**
+ * Return the NM's {@link DeletionService}.
+ *
+ * @return the NM's {@link DeletionService}.
+ */
+ public DeletionService getDeletionService() {
+ return this.deletionService;
+ }
+
+ /**
+ * Set the NM's {@link DeletionService}.
+ *
+ * @param deletionService the {@link DeletionService} to add to the Context.
+ */
+ public void setDeletionService(DeletionService deletionService) {
+ this.deletionService = deletionService;
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java
index f9b762a..dd129f5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/NMProtoUtils.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.recovery.DeletionTaskRecoveryInfo;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DeletionTask;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DeletionTaskType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionTask;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.FileDeletionTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -51,18 +52,29 @@ public final class NMProtoUtils {
int taskId = proto.getId();
if (proto.hasTaskType() && proto.getTaskType() != null) {
if (proto.getTaskType().equals(DeletionTaskType.FILE.name())) {
- LOG.debug("Converting recovered FileDeletionTask");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Converting recovered FileDeletionTask");
+ }
return convertProtoToFileDeletionTask(proto, deletionService, taskId);
+ } else if (proto.getTaskType().equals(
+ DeletionTaskType.DOCKER_CONTAINER.name())) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Converting recovered DockerContainerDeletionTask");
+ }
+ return convertProtoToDockerContainerDeletionTask(proto, deletionService,
+ taskId);
}
}
- LOG.debug("Unable to get task type, trying FileDeletionTask");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Unable to get task type, trying FileDeletionTask");
+ }
return convertProtoToFileDeletionTask(proto, deletionService, taskId);
}
/**
* Convert the Protobuf representation into the {@link FileDeletionTask}.
*
- * @param proto the Protobuf representation of the {@link FileDeletionTask}
+ * @param proto the Protobuf representation of the {@link FileDeletionTask}.
* @param deletionService the {@link DeletionService}.
* @param taskId the ID of the {@link DeletionTask}.
* @return the populated {@link FileDeletionTask}.
@@ -88,6 +100,25 @@ public final class NMProtoUtils {
}
/**
+ * Convert the Protobuf format into the {@link DockerContainerDeletionTask}.
+ *
+ * @param proto Protobuf format of the {@link DockerContainerDeletionTask}.
+ * @param deletionService the {@link DeletionService}.
+ * @param taskId the ID of the {@link DeletionTask}.
+ * @return the populated {@link DockerContainerDeletionTask}.
+ */
+ public static DockerContainerDeletionTask
+ convertProtoToDockerContainerDeletionTask(
+ DeletionServiceDeleteTaskProto proto, DeletionService deletionService,
+ int taskId) {
+ String user = proto.hasUser() ? proto.getUser() : null;
+ String containerId =
+ proto.hasDockerContainerId() ? proto.getDockerContainerId() : null;
+ return new DockerContainerDeletionTask(taskId, deletionService, user,
+ containerId);
+ }
+
+ /**
* Convert the Protobuf representation to the {@link DeletionTaskRecoveryInfo}
* representation.
*
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index f95c2a0..1255316 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -60,13 +60,16 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionTask;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceSet;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationCleanupEvent;
@@ -1512,6 +1515,11 @@ public class ContainerImpl implements Container {
// TODO: Add containerWorkDir to the deletion service.
+ if (DockerLinuxContainerRuntime.isDockerContainerRequested(
+ container.getLaunchContext().getEnvironment())) {
+ removeDockerContainer(container);
+ }
+
if (clCleanupRequired) {
container.dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(container,
@@ -1547,6 +1555,11 @@ public class ContainerImpl implements Container {
// TODO: Add containerWorkDir to the deletion service.
// TODO: Add containerOuputDir to the deletion service.
+ if (DockerLinuxContainerRuntime.isDockerContainerRequested(
+ container.getLaunchContext().getEnvironment())) {
+ removeDockerContainer(container);
+ }
+
if (clCleanupRequired) {
container.dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(container,
@@ -1841,6 +1854,11 @@ public class ContainerImpl implements Container {
container.addDiagnostics(exitEvent.getDiagnosticInfo() + "\n");
}
+ if (DockerLinuxContainerRuntime.isDockerContainerRequested(
+ container.getLaunchContext().getEnvironment())) {
+ removeDockerContainer(container);
+ }
+
// The process/process-grp is killed. Decrement reference counts and
// cleanup resources
container.cleanup();
@@ -2178,4 +2196,12 @@ public class ContainerImpl implements Container {
public ResourceMappings getResourceMappings() {
return resourceMappings;
}
+
+ private static void removeDockerContainer(ContainerImpl container) {
+ DeletionService deletionService = container.context.getDeletionService();
+ DockerContainerDeletionTask deletionTask =
+ new DockerContainerDeletionTask(deletionService, container.user,
+ container.getContainerId().toString());
+ deletionService.delete(deletionTask);
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java
index 676c71b..9918503 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DeletionTaskType.java
@@ -20,5 +20,5 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task
* Available types of {@link DeletionTask}s.
*/
public enum DeletionTaskType {
- FILE
+ FILE, DOCKER_CONTAINER
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java
new file mode 100644
index 0000000..70b918a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task;
+
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor;
+
+/**
+ * {@link DeletionTask} handling the removal of Docker containers.
+ */
+public class DockerContainerDeletionTask extends DeletionTask
+ implements Runnable {
+ private String containerId;
+
+ public DockerContainerDeletionTask(DeletionService deletionService,
+ String user, String containerId) {
+ this(INVALID_TASK_ID, deletionService, user, containerId);
+ }
+
+ public DockerContainerDeletionTask(int taskId,
+ DeletionService deletionService, String user, String containerId) {
+ super(taskId, deletionService, user, DeletionTaskType.DOCKER_CONTAINER);
+ this.containerId = containerId;
+ }
+
+ /**
+ * Get the id of the container to delete.
+ *
+ * @return the id of the container to delete.
+ */
+ public String getContainerId() {
+ return containerId;
+ }
+
+ /**
+ * Delete the specified Docker container.
+ */
+ @Override
+ public void run() {
+ if (LOG.isDebugEnabled()) {
+ String msg = String.format("Running DeletionTask : %s", toString());
+ LOG.debug(msg);
+ }
+ LinuxContainerExecutor exec = ((LinuxContainerExecutor)
+ getDeletionService().getContainerExecutor());
+ exec.removeDockerContainer(containerId);
+ }
+
+ /**
+ * Convert the DockerContainerDeletionTask to a String representation.
+ *
+ * @return String representation of the DockerContainerDeletionTask.
+ */
+ @Override
+ public String toString() {
+ StringBuffer sb = new StringBuffer("DockerContainerDeletionTask : ");
+ sb.append(" id : ").append(this.getTaskId());
+ sb.append(" containerId : ").append(this.containerId);
+ return sb.toString().trim();
+ }
+
+ /**
+ * Convert the DockerContainerDeletionTask to the Protobuf representation for
+ * storing in the state store and recovery.
+ *
+ * @return the protobuf representation of the DockerContainerDeletionTask.
+ */
+ public DeletionServiceDeleteTaskProto convertDeletionTaskToProto() {
+ DeletionServiceDeleteTaskProto.Builder builder =
+ getBaseDeletionTaskProtoBuilder();
+ builder.setTaskType(DeletionTaskType.DOCKER_CONTAINER.name());
+ if (getContainerId() != null) {
+ builder.setDockerContainerId(getContainerId());
+ }
+ return builder.build();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index db90215..95ced22 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -77,9 +77,11 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader;
@@ -704,32 +706,9 @@ public class ContainerLaunch implements Callable<Integer> {
}
// kill process
+ String user = container.getUser();
if (processId != null) {
- String user = container.getUser();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Sending signal to pid " + processId + " as user " + user
- + " for container " + containerIdStr);
- }
- final Signal signal = sleepDelayBeforeSigKill > 0
- ? Signal.TERM
- : Signal.KILL;
-
- boolean result = exec.signalContainer(
- new ContainerSignalContext.Builder()
- .setContainer(container)
- .setUser(user)
- .setPid(processId)
- .setSignal(signal)
- .build());
- if (LOG.isDebugEnabled()) {
- LOG.debug("Sent signal " + signal + " to pid " + processId
- + " as user " + user + " for container " + containerIdStr
- + ", result=" + (result ? "success" : "failed"));
- }
- if (sleepDelayBeforeSigKill > 0) {
- new DelayedProcessKiller(container, user,
- processId, sleepDelayBeforeSigKill, Signal.KILL, exec).start();
- }
+ signalProcess(processId, user, containerIdStr);
} else {
// Normally this means that the process was notified about
// deactivateContainer above and did not start.
@@ -750,6 +729,11 @@ public class ContainerLaunch implements Callable<Integer> {
// Increasing YarnConfiguration.NM_PROCESS_KILL_WAIT_MS
// reduces the likelihood of this race condition and process leak.
}
+ // The Docker container may not have fully started, reap the container.
+ if (DockerLinuxContainerRuntime.isDockerContainerRequested(
+ container.getLaunchContext().getEnvironment())) {
+ reapDockerContainerNoPid(user);
+ }
}
} catch (Exception e) {
String message =
@@ -766,6 +750,36 @@ public class ContainerLaunch implements Callable<Integer> {
lfs.delete(pidFilePath.suffix(EXIT_CODE_FILE_SUFFIX), false);
}
}
+
+ final int sleepMsec = 100;
+ int msecLeft = 2000;
+ if (pidFilePath != null) {
+ File file = new File(getExitCodeFile(pidFilePath.toString()));
+ while (!file.exists() && msecLeft >= 0) {
+ try {
+ Thread.sleep(sleepMsec);
+ } catch (InterruptedException e) {
+ }
+ msecLeft -= sleepMsec;
+ }
+ if (msecLeft < 0) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Timeout while waiting for the exit code file: "
+ + file.getAbsolutePath());
+ }
+ }
+ }
+
+ // Reap the container
+ boolean result = exec.reapContainer(
+ new ContainerReapContext.Builder()
+ .setContainer(container)
+ .setUser(container.getUser())
+ .build());
+ if (!result) {
+ throw new IOException("Reap container failed for container "
+ + containerIdStr);
+ }
}
/**
@@ -844,6 +858,50 @@ public class ContainerLaunch implements Callable<Integer> {
}
}
+ private boolean sendSignal(String user, String processId, Signal signal)
+ throws IOException {
+ return exec.signalContainer(
+ new ContainerSignalContext.Builder().setContainer(container)
+ .setUser(user).setPid(processId).setSignal(signal).build());
+ }
+
+ private void signalProcess(String processId, String user,
+ String containerIdStr) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Sending signal to pid " + processId + " as user " + user
+ + " for container " + containerIdStr);
+ }
+ final Signal signal =
+ sleepDelayBeforeSigKill > 0 ? Signal.TERM : Signal.KILL;
+
+ boolean result = sendSignal(user, processId, signal);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Sent signal " + signal + " to pid " + processId + " as user "
+ + user + " for container " + containerIdStr + ", result="
+ + (result ? "success" : "failed"));
+ }
+ if (sleepDelayBeforeSigKill > 0) {
+ new DelayedProcessKiller(container, user, processId,
+ sleepDelayBeforeSigKill, Signal.KILL, exec).start();
+ }
+ }
+
+ private void reapDockerContainerNoPid(String user) throws IOException {
+ String containerIdStr =
+ container.getContainerTokenIdentifier().getContainerID().toString();
+ LOG.info("Unable to obtain pid, but docker container request detected. "
+ + "Attempting to reap container " + containerIdStr);
+ boolean result = exec.reapContainer(
+ new ContainerReapContext.Builder()
+ .setContainer(container)
+ .setUser(container.getUser())
+ .build());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Sent signal to docker container " + containerIdStr
+ + " as user " + user + ", result=" + (result ? "success" : "failed"));
+ }
+ }
+
@VisibleForTesting
public static Signal translateCommandToSignal(
SignalContainerCommand command) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
index 20359ea..6799ce2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
@@ -22,6 +22,9 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerCommandExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerKillCommand;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRmCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerVolumeCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
@@ -145,6 +148,17 @@ import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.r
* container-executor based on the values set in container-executor.cfg for
* {@code docker.allowed.ro-mounts} and {@code docker.allowed.rw-mounts}.
* </li>
+ * <li>
+ * {@code YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL} allows a user
+ * to request delayed deletion of the Docker containers on a per
+ * container basis. If true, Docker containers will not be removed until
+ * the duration defined by {@code yarn.nodemanager.delete.debug-delay-sec}
+ * has elapsed. Administrators can disable this feature through the
+ * yarn-site property
+ * {@code yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed}.
+ * This feature is disabled by default. When this feature is disabled or set
+ * to false, the container will be removed as soon as it exits.
+ * </li>
* </ul>
*/
@InterfaceAudience.Private
@@ -192,6 +206,9 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
@InterfaceAudience.Private
public static final String ENV_DOCKER_CONTAINER_MOUNTS =
"YARN_CONTAINER_RUNTIME_DOCKER_MOUNTS";
+ @InterfaceAudience.Private
+ public static final String ENV_DOCKER_CONTAINER_DELAYED_REMOVAL =
+ "YARN_CONTAINER_RUNTIME_DOCKER_DELAYED_REMOVAL";
private Configuration conf;
private Context nmContext;
@@ -206,6 +223,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
private int userRemappingUidThreshold;
private int userRemappingGidThreshold;
private Set<String> capabilities;
+ private boolean delayedRemovalAllowed;
/**
* Return whether the given environment variables indicate that the operation
@@ -306,6 +324,10 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
YarnConfiguration.DEFAULT_NM_DOCKER_USER_REMAPPING_GID_THRESHOLD);
capabilities = getDockerCapabilitiesFromConf();
+
+ delayedRemovalAllowed = conf.getBoolean(
+ YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL,
+ YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL);
}
private Set<String> getDockerCapabilitiesFromConf() throws
@@ -833,49 +855,66 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
}
}
+ /**
+ * Signal the docker container.
+ *
+ * Signals are used to check the liveliness of the container as well as to
+ * stop/kill the container. The following outlines the docker container
+ * signal handling.
+ *
+ * <ol>
+ * <li>If the null signal is sent, run kill -0 on the pid. This is used
+ * to check if the container is still alive, which is necessary for
+ * reacquiring containers on NM restart.</li>
+ * <li>If SIGTERM, SIGKILL is sent, attempt to stop and remove the docker
+ * container.</li>
+ * <li>If the docker container exists and is running, execute docker
+ * stop.</li>
+ * <li>If any other signal is sent, signal the container using docker
+ * kill.</li>
+ * </ol>
+ *
+ * @param ctx the {@link ContainerRuntimeContext}.
+ * @throws ContainerExecutionException if the signaling fails.
+ */
@Override
public void signalContainer(ContainerRuntimeContext ctx)
throws ContainerExecutionException {
ContainerExecutor.Signal signal = ctx.getExecutionAttribute(SIGNAL);
-
- PrivilegedOperation privOp = null;
- // Handle liveliness checks, send null signal to pid
- if(ContainerExecutor.Signal.NULL.equals(signal)) {
- privOp = new PrivilegedOperation(
- PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
- privOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
- ctx.getExecutionAttribute(USER),
- Integer.toString(PrivilegedOperation.RunAsUserCommand
- .SIGNAL_CONTAINER.getValue()),
- ctx.getExecutionAttribute(PID),
- Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
-
- // All other signals handled as docker stop
- } else {
- String containerId = ctx.getContainer().getContainerId().toString();
- DockerStopCommand stopCommand = new DockerStopCommand(containerId);
- String commandFile = dockerClient.writeCommandToTempFile(stopCommand,
- containerId);
- privOp = new PrivilegedOperation(
- PrivilegedOperation.OperationType.RUN_DOCKER_CMD);
- privOp.appendArgs(commandFile);
- }
-
- //Some failures here are acceptable. Let the calling executor decide.
- privOp.disableFailureLogging();
-
+ String containerId = ctx.getContainer().getContainerId().toString();
+ Map<String, String> env =
+ ctx.getContainer().getLaunchContext().getEnvironment();
try {
- privilegedOperationExecutor.executePrivilegedOperation(null,
- privOp, null, null, false, false);
- } catch (PrivilegedOperationException e) {
- throw new ContainerExecutionException("Signal container failed", e
- .getExitCode(), e.getOutput(), e.getErrorOutput());
+ if (ContainerExecutor.Signal.NULL.equals(signal)) {
+ executeLivelinessCheck(ctx);
+ } else {
+ if (ContainerExecutor.Signal.KILL.equals(signal)
+ || ContainerExecutor.Signal.TERM.equals(signal)) {
+ handleContainerStop(containerId, env);
+ } else {
+ handleContainerKill(containerId, env, signal);
+ }
+ }
+ } catch (ContainerExecutionException e) {
+ LOG.warn("Signal docker container failed. Exception: ", e);
+ throw new ContainerExecutionException("Signal docker container failed",
+ e.getExitCode(), e.getOutput(), e.getErrorOutput());
}
}
+ /**
+ * Reap the docker container.
+ *
+ * @param ctx the {@link ContainerRuntimeContext}.
+ * @throws ContainerExecutionException if the removal fails.
+ */
@Override
public void reapContainer(ContainerRuntimeContext ctx)
throws ContainerExecutionException {
+ // Clean up the Docker container
+ handleContainerRemove(ctx.getContainer().getContainerId().toString(),
+ ctx.getContainer().getLaunchContext().getEnvironment());
+
// Cleanup volumes when needed.
if (nmContext != null
&& nmContext.getResourcePluginManager().getNameToPlugins() != null) {
@@ -993,4 +1032,83 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
+ "' doesn't match docker image name pattern");
}
}
+
+ private void executeLivelinessCheck(ContainerRuntimeContext ctx)
+ throws ContainerExecutionException {
+ PrivilegedOperation signalOp = new PrivilegedOperation(
+ PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
+ signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
+ ctx.getExecutionAttribute(USER), Integer.toString(
+ PrivilegedOperation.RunAsUserCommand.SIGNAL_CONTAINER.getValue()),
+ ctx.getExecutionAttribute(PID),
+ Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
+ signalOp.disableFailureLogging();
+ try {
+ privilegedOperationExecutor.executePrivilegedOperation(null, signalOp,
+ null, ctx.getContainer().getLaunchContext().getEnvironment(), false,
+ false);
+ } catch (PrivilegedOperationException e) {
+ String msg = "Liveliness check failed for PID: "
+ + ctx.getExecutionAttribute(PID)
+ + ". Container may have already completed.";
+ throw new ContainerExecutionException(msg, e.getExitCode(), e.getOutput(),
+ e.getErrorOutput());
+ }
+ }
+
+ private void handleContainerStop(String containerId, Map<String, String> env)
+ throws ContainerExecutionException {
+ DockerCommandExecutor.DockerContainerStatus containerStatus =
+ DockerCommandExecutor.getContainerStatus(containerId, conf,
+ privilegedOperationExecutor);
+ if (DockerCommandExecutor.isStoppable(containerStatus)) {
+ DockerStopCommand dockerStopCommand = new DockerStopCommand(containerId);
+ DockerCommandExecutor.executeDockerCommand(dockerStopCommand, containerId,
+ env, conf, privilegedOperationExecutor, false);
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Container status is " + containerStatus.getName()
+ + ", skipping stop - " + containerId);
+ }
+ }
+ }
+
+ private void handleContainerKill(String containerId, Map<String, String> env,
+ ContainerExecutor.Signal signal) throws ContainerExecutionException {
+ DockerCommandExecutor.DockerContainerStatus containerStatus =
+ DockerCommandExecutor.getContainerStatus(containerId, conf,
+ privilegedOperationExecutor);
+ if (DockerCommandExecutor.isKillable(containerStatus)) {
+ DockerKillCommand dockerKillCommand =
+ new DockerKillCommand(containerId).setSignal(signal.name());
+ DockerCommandExecutor.executeDockerCommand(dockerKillCommand, containerId,
+ env, conf, privilegedOperationExecutor, false);
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Container status is " + containerStatus.getName()
+ + ", skipping kill - " + containerId);
+ }
+ }
+ }
+
+ private void handleContainerRemove(String containerId,
+ Map<String, String> env) throws ContainerExecutionException {
+ String delayedRemoval = env.get(ENV_DOCKER_CONTAINER_DELAYED_REMOVAL);
+ if (delayedRemovalAllowed && delayedRemoval != null
+ && delayedRemoval.equalsIgnoreCase("true")) {
+ LOG.info("Delayed removal requested and allowed, skipping removal - "
+ + containerId);
+ } else {
+ DockerCommandExecutor.DockerContainerStatus containerStatus =
+ DockerCommandExecutor.getContainerStatus(containerId, conf,
+ privilegedOperationExecutor);
+ if (DockerCommandExecutor.isRemovable(containerStatus)) {
+ DockerRmCommand dockerRmCommand = new DockerRmCommand(containerId);
+ DockerCommandExecutor.executeDockerCommand(dockerRmCommand, containerId,
+ env, conf, privilegedOperationExecutor, false);
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java
index 76b53af..fd1812b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommandExecutor.java
@@ -183,9 +183,46 @@ public final class DockerCommandExecutor {
new DockerInspectCommand(containerId).getContainerStatus();
try {
return DockerCommandExecutor.executeDockerCommand(dockerInspectCommand,
- containerId, null, conf, privilegedOperationExecutor, false);
+ containerId, null, conf, privilegedOperationExecutor, true);
} catch (ContainerExecutionException e) {
throw new ContainerExecutionException(e);
}
}
+
+ /**
+ * Is the container in a stoppable state?
+ *
+ * @param containerStatus the container's {@link DockerContainerStatus}.
+ * @return is the container in a stoppable state.
+ */
+ public static boolean isStoppable(DockerContainerStatus containerStatus) {
+ if (containerStatus.equals(DockerContainerStatus.RUNNING)
+ || containerStatus.equals(DockerContainerStatus.RESTARTING)) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Is the container in a killable state?
+ *
+ * @param containerStatus the container's {@link DockerContainerStatus}.
+ * @return is the container in a killable state.
+ */
+ public static boolean isKillable(DockerContainerStatus containerStatus) {
+ return isStoppable(containerStatus);
+ }
+
+ /**
+ * Is the container in a removable state?
+ *
+ * @param containerStatus the container's {@link DockerContainerStatus}.
+ * @return is the container in a removable state.
+ */
+ public static boolean isRemovable(DockerContainerStatus containerStatus) {
+ return !containerStatus.equals(DockerContainerStatus.NONEXISTENT)
+ && !containerStatus.equals(DockerContainerStatus.UNKNOWN)
+ && !containerStatus.equals(DockerContainerStatus.REMOVING)
+ && !containerStatus.equals(DockerContainerStatus.RUNNING);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java
new file mode 100644
index 0000000..d37370e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerKillCommand.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker;
+
+/**
+ * Encapsulates the docker kill command and its command line arguments.
+ */
+public class DockerKillCommand extends DockerCommand {
+ private static final String KILL_COMMAND = "kill";
+
+ public DockerKillCommand(String containerName) {
+ super(KILL_COMMAND);
+ super.addCommandArguments("name", containerName);
+ }
+
+ /**
+ * Set the signal for the {@link DockerKillCommand}.
+ *
+ * @param signal the signal to send to the container.
+ * @return the {@link DockerKillCommand} with the signal set.
+ */
+ public DockerKillCommand setSignal(String signal) {
+ super.addCommandArguments("signal", signal);
+ return this;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java
new file mode 100644
index 0000000..83aeef8
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/executor/ContainerReapContext.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.executor;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+
+/**
+ * Encapsulate the details needed to reap a container.
+ */
+public final class ContainerReapContext {
+
+ private final Container container;
+ private final String user;
+
+ /**
+ * Builder for the ContainerReapContext.
+ */
+ public static final class Builder {
+ private Container builderContainer;
+ private String builderUser;
+
+ public Builder() {
+ }
+
+ /**
+ * Set the container within the context.
+ *
+ * @param container the {@link Container}.
+ * @return the Builder with the container set.
+ */
+ public Builder setContainer(Container container) {
+ this.builderContainer = container;
+ return this;
+ }
+
+ /**
+ * Set the set within the context.
+ *
+ * @param user the user.
+ * @return the Builder with the user set.
+ */
+ public Builder setUser(String user) {
+ this.builderUser = user;
+ return this;
+ }
+
+ /**
+ * Builds the context with the attributes set.
+ *
+ * @return the context.
+ */
+ public ContainerReapContext build() {
+ return new ContainerReapContext(this);
+ }
+ }
+
+ private ContainerReapContext(Builder builder) {
+ this.container = builder.builderContainer;
+ this.user = builder.builderUser;
+ }
+
+ /**
+ * Get the container set for the context.
+ *
+ * @return the {@link Container} set in the context.
+ */
+ public Container getContainer() {
+ return container;
+ }
+
+ /**
+ * Get the user set for the context.
+ *
+ * @return the user set in the context.
+ */
+ public String getUser() {
+ return user;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
index 0447b8e..c1977e5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
@@ -19,6 +19,7 @@
#include "configuration.h"
#include "container-executor.h"
#include "utils/docker-util.h"
+#include "utils/path-utils.h"
#include "util.h"
#include "config.h"
@@ -71,6 +72,8 @@ static const char* DEFAULT_BANNED_USERS[] = {"yarn", "mapred", "hdfs", "bin", 0}
static const int DEFAULT_DOCKER_SUPPORT_ENABLED = 0;
static const int DEFAULT_TC_SUPPORT_ENABLED = 0;
+static const char* PROC_PATH = "/proc";
+
//location of traffic control binary
static const char* TC_BIN = "/sbin/tc";
static const char* TC_MODIFY_STATE_OPTS [] = { "-b" , NULL};
@@ -1359,6 +1362,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
char *docker_logs_command = NULL;
char *docker_inspect_command = NULL;
char *docker_rm_command = NULL;
+ char *docker_inspect_exitcode_command = NULL;
int container_file_source =-1;
int cred_file_source = -1;
int BUFFER_SIZE = 4096;
@@ -1371,6 +1375,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
docker_logs_command = (char *) alloc_and_clear_memory(command_size, sizeof(char));
docker_inspect_command = (char *) alloc_and_clear_memory(command_size, sizeof(char));
docker_rm_command = (char *) alloc_and_clear_memory(command_size, sizeof(char));
+ docker_inspect_exitcode_command = (char *) alloc_and_clear_memory(command_size, sizeof(char));
gid_t user_gid = getegid();
uid_t prev_uid = geteuid();
@@ -1421,6 +1426,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
snprintf(docker_command_with_binary, command_size, "%s %s", docker_binary, docker_command);
fprintf(LOGFILE, "Launching docker container...\n");
+ fprintf(LOGFILE, "Docker run command: %s\n", docker_command_with_binary);
FILE* start_docker = popen(docker_command_with_binary, "r");
if (pclose (start_docker) != 0)
{
@@ -1436,9 +1442,11 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
docker_binary, container_id);
fprintf(LOGFILE, "Inspecting docker container...\n");
+ fprintf(LOGFILE, "Docker inspect command: %s\n", docker_inspect_command);
FILE* inspect_docker = popen(docker_inspect_command, "r");
int pid = 0;
int res = fscanf (inspect_docker, "%d", &pid);
+ fprintf(LOGFILE, "pid from docker inspect: %d\n", pid);
if (pclose (inspect_docker) != 0 || res <= 0)
{
fprintf (ERRORFILE,
@@ -1476,17 +1484,45 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
goto cleanup;
}
- snprintf(docker_wait_command, command_size,
- "%s wait %s", docker_binary, container_id);
+ fprintf(LOGFILE, "Waiting for docker container to finish.\n");
+#ifdef __linux
+ size_t command_size = MIN(sysconf(_SC_ARG_MAX), 128*1024);
+ char* proc_pid_path = alloc_and_clear_memory(command_size, sizeof(char));
+ snprintf(proc_pid_path, command_size, "%s/%d", PROC_PATH, pid);
+ while (dir_exists(proc_pid_path) == 0) {
+ sleep(1);
+ }
+ if (dir_exists(proc_pid_path) == -1) {
+ fprintf(ERRORFILE, "Error occurred checking %s\n", proc_pid_path);
+ fflush(ERRORFILE);
+ }
+#else
+ while (kill(pid,0) == 0) {
+ sleep(1);
+ }
+#endif
- fprintf(LOGFILE, "Waiting for docker container to finish...\n");
- FILE* wait_docker = popen(docker_wait_command, "r");
- res = fscanf (wait_docker, "%d", &exit_code);
- if (pclose (wait_docker) != 0 || res <= 0) {
- fprintf (ERRORFILE,
- "Could not attach to docker; is container dead? %s.\n", docker_wait_command);
+ sprintf(docker_inspect_exitcode_command,
+ "%s inspect --format {{.State.ExitCode}} %s",
+ docker_binary, container_id);
+ fprintf(LOGFILE, "Obtaining the exit code...\n");
+ fprintf(LOGFILE, "Docker inspect command: %s\n", docker_inspect_exitcode_command);
+ FILE* inspect_exitcode_docker = popen(docker_inspect_exitcode_command, "r");
+ if(inspect_exitcode_docker == NULL) {
+ fprintf(ERRORFILE, "Done with inspect_exitcode, inspect_exitcode_docker is null\n");
fflush(ERRORFILE);
+ exit_code = -1;
+ goto cleanup;
}
+ res = fscanf (inspect_exitcode_docker, "%d", &exit_code);
+ if (pclose (inspect_exitcode_docker) != 0 || res <= 0) {
+ fprintf (ERRORFILE,
+ "Could not inspect docker to get exitcode: %s.\n", docker_inspect_exitcode_command);
+ fflush(ERRORFILE);
+ exit_code = -1;
+ goto cleanup;
+ }
+ fprintf(LOGFILE, "Exit code from docker inspect: %d\n", exit_code);
if(exit_code != 0) {
fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n",
exit_code);
@@ -1519,19 +1555,6 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
}
}
- fprintf(LOGFILE, "Removing docker container post-exit...\n");
- snprintf(docker_rm_command, command_size,
- "%s rm %s", docker_binary, container_id);
- FILE* rm_docker = popen(docker_rm_command, "w");
- if (pclose (rm_docker) != 0)
- {
- fprintf (ERRORFILE,
- "Could not remove container %s.\n", docker_rm_command);
- fflush(ERRORFILE);
- exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT;
- goto cleanup;
- }
-
cleanup:
if (exit_code_file != NULL && write_exit_code_file_as_nm(exit_code_file, exit_code) < 0) {
@@ -1539,6 +1562,7 @@ cleanup:
"Could not write exit code to file %s.\n", exit_code_file);
fflush(ERRORFILE);
}
+ fprintf(LOGFILE, "Wrote the exit code %d to %s\n", exit_code, exit_code_file);
// Drop root privileges
if (change_effective_user(prev_uid, user_gid) != 0) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
index a0138d1..b5cb551 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.c
@@ -253,6 +253,8 @@ int get_docker_command(const char *command_file, const struct configuration *con
char *command = get_configuration_value("docker-command", DOCKER_COMMAND_FILE_SECTION, &command_config);
if (strcmp(DOCKER_INSPECT_COMMAND, command) == 0) {
return get_docker_inspect_command(command_file, conf, out, outlen);
+ } else if (strcmp(DOCKER_KILL_COMMAND, command) == 0) {
+ return get_docker_kill_command(command_file, conf, out, outlen);
} else if (strcmp(DOCKER_LOAD_COMMAND, command) == 0) {
return get_docker_load_command(command_file, conf, out, outlen);
} else if (strcmp(DOCKER_PULL_COMMAND, command) == 0) {
@@ -661,6 +663,66 @@ int get_docker_stop_command(const char *command_file, const struct configuration
return BUFFER_TOO_SMALL;
}
+int get_docker_kill_command(const char *command_file, const struct configuration *conf,
+ char *out, const size_t outlen) {
+ int ret = 0;
+ size_t len = 0, i = 0;
+ char *value = NULL;
+ char *container_name = NULL;
+ struct configuration command_config = {0, NULL};
+ ret = read_and_verify_command_file(command_file, DOCKER_KILL_COMMAND, &command_config);
+ if (ret != 0) {
+ return ret;
+ }
+
+ container_name = get_configuration_value("name", DOCKER_COMMAND_FILE_SECTION, &command_config);
+ if (container_name == NULL || validate_container_name(container_name) != 0) {
+ return INVALID_DOCKER_CONTAINER_NAME;
+ }
+
+ memset(out, 0, outlen);
+
+ ret = add_docker_config_param(&command_config, out, outlen);
+ if (ret != 0) {
+ return BUFFER_TOO_SMALL;
+ }
+
+ ret = add_to_buffer(out, outlen, DOCKER_KILL_COMMAND);
+ if (ret == 0) {
+ value = get_configuration_value("signal", DOCKER_COMMAND_FILE_SECTION, &command_config);
+ if (value != NULL) {
+ len = strlen(value);
+ for (i = 0; i < len; ++i) {
+ if (isupper(value[i]) == 0) {
+ fprintf(ERRORFILE, "Value for signal contains non-uppercase characters '%s'\n", value);
+ free(container_name);
+ memset(out, 0, outlen);
+ return INVALID_DOCKER_KILL_COMMAND;
+ }
+ }
+ ret = add_to_buffer(out, outlen, " --signal=");
+ if (ret == 0) {
+ ret = add_to_buffer(out, outlen, value);
+ }
+ if (ret != 0) {
+ free(container_name);
+ return BUFFER_TOO_SMALL;
+ }
+ }
+ ret = add_to_buffer(out, outlen, " ");
+ if (ret == 0) {
+ ret = add_to_buffer(out, outlen, container_name);
+ }
+ free(container_name);
+ if (ret != 0) {
+ return BUFFER_TOO_SMALL;
+ }
+ return 0;
+ }
+ free(container_name);
+ return BUFFER_TOO_SMALL;
+}
+
static int detach_container(const struct configuration *command_config, char *out, const size_t outlen) {
return add_param_to_command(command_config, "detach", "-d ", 0, out, outlen);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
index 9c42abe..f98800c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/docker-util.h
@@ -30,6 +30,7 @@
#define DOCKER_RM_COMMAND "rm"
#define DOCKER_RUN_COMMAND "run"
#define DOCKER_STOP_COMMAND "stop"
+#define DOCKER_KILL_COMMAND "kill"
#define DOCKER_VOLUME_COMMAND "volume"
@@ -51,6 +52,7 @@ enum docker_error_codes {
MOUNT_ACCESS_ERROR,
INVALID_DOCKER_DEVICE,
INVALID_DOCKER_STOP_COMMAND,
+ INVALID_DOCKER_KILL_COMMAND,
INVALID_DOCKER_VOLUME_DRIVER,
INVALID_DOCKER_VOLUME_NAME,
INVALID_DOCKER_VOLUME_COMMAND
@@ -135,6 +137,16 @@ int get_docker_run_command(const char* command_file, const struct configuration*
int get_docker_stop_command(const char* command_file, const struct configuration* conf, char *out, const size_t outlen);
/**
+ * Get the Docker kill command line string. The function will verify that the params file is meant for the kill command.
+ * @param command_file File containing the params for the Docker kill command
+ * @param conf Configuration struct containing the container-executor.cfg details
+ * @param out Buffer to fill with the kill command
+ * @param outlen Size of the output buffer
+ * @return Return code with 0 indicating success and non-zero codes indicating error
+ */
+int get_docker_kill_command(const char* command_file, const struct configuration* conf, char *out, const size_t outlen);
+
+/**
* Get the Docker volume command line string. The function will verify that the
* params file is meant for the volume command.
* @param command_file File containing the params for the Docker volume command
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c
index dea656b..d8219bb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.c
@@ -18,6 +18,8 @@
#include "util.h"
+#include <dirent.h>
+#include <errno.h>
#include <strings.h>
#include <string.h>
#include <stdio.h>
@@ -49,4 +51,16 @@ int verify_path_safety(const char* path) {
free(dup);
return succeeded;
+}
+
+int dir_exists(const char* path) {
+ DIR* dir = opendir(path);
+ if (dir) {
+ closedir(dir);
+ return 0;
+ } else if (ENOENT == errno) {
+ return 1;
+ } else {
+ return -1;
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h
index a42f936..041ec95 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/utils/path-utils.h
@@ -32,4 +32,11 @@
*/
int verify_path_safety(const char* path);
+/*
+ * Verify that a given directory exists.
+ * return 0 if the directory exists, 1 if the directory does not exist, and -1
+ * for all other errors.
+ */
+int dir_exists(const char* path);
+
#endif
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc
index a24c0c7..05e17cd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test-path-utils.cc
@@ -64,4 +64,16 @@
ASSERT_TRUE(flag) << "Should succeeded\n";
}
+TEST_F(TestPathUtils, test_dir_exists) {
+ const char* input = "/non/existent/dir";
+ int flag = dir_exists(input);
+ std::cout << "Testing input=" << input << "\n";
+ ASSERT_NE(flag, 0) << "Should failed\n";
+
+ input = "/";
+ flag = dir_exists(input);
+ std::cout << "Testing input=" << input << "\n";
+ ASSERT_EQ(flag, 0) << "Should succeeded\n";
+}
+
} // namespace ContainerExecutor
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
index 0c1c4bf..416bf38 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/utils/test_docker_util.cc
@@ -312,6 +312,32 @@ namespace ContainerExecutor {
run_docker_command_test(file_cmd_vec, bad_file_cmd_vec, get_docker_stop_command);
}
+ TEST_F(TestDockerUtil, test_docker_kill) {
+ std::vector<std::pair<std::string, std::string> > file_cmd_vec;
+ file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+ "[docker-command-execution]\n docker-command=kill\n name=container_e1_12312_11111_02_000001",
+ "kill container_e1_12312_11111_02_000001"));
+ file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
+ "[docker-command-execution]\n docker-command=kill\n name=container_e1_12312_11111_02_000001\nsignal=SIGQUIT",
+ "kill --signal=SIGQUIT container_e1_12312_11111_02_000001"));
+
+ std::vector<std::pair<std::string, int> > bad_file_cmd_vec;
+ bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
+ "[docker-command-execution]\n docker-command=run\n name=container_e1_12312_11111_02_000001",
+ static_cast<int>(INCORRECT_COMMAND)));
+ bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
+ "docker-command=kill\n name=ctr-id", static_cast<int>(INCORRECT_COMMAND)));
+ bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
+ "[docker-command-execution]\n docker-command=kill\n name=", static_cast<int>(INVALID_DOCKER_CONTAINER_NAME)));
+ bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
+ "[docker-command-execution]\n docker-command=kill", static_cast<int>(INVALID_DOCKER_CONTAINER_NAME)));
+ bad_file_cmd_vec.push_back(std::make_pair<std::string, int>(
+ "[docker-command-execution]\n docker-command=kill\n name=container_e1_12312_11111_02_000001\n signal=foo | bar",
+ static_cast<int>(INVALID_DOCKER_KILL_COMMAND)));
+
+ run_docker_command_test(file_cmd_vec, bad_file_cmd_vec, get_docker_kill_command);
+ }
+
TEST_F(TestDockerUtil, test_detach_container) {
std::vector<std::pair<std::string, std::string> > file_cmd_vec;
file_cmd_vec.push_back(std::make_pair<std::string, std::string>(
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto
index 7212953..4eee9b4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto
@@ -42,6 +42,7 @@ message DeletionServiceDeleteTaskProto {
repeated string basedirs = 5;
repeated int32 successorIds = 6;
optional string taskType = 7;
+ optional string dockerContainerId = 8;
}
message LocalizedResourceProto {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java
index 396c8f4..3bcdc87 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerExecutor.java
@@ -25,7 +25,8 @@ import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
-
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
import org.junit.Assert;
@@ -33,6 +34,7 @@ import org.junit.Test;
import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows;
import static org.junit.Assert.*;
+import static org.mockito.Mockito.mock;
@SuppressWarnings("deprecation")
public class TestContainerExecutor {
@@ -158,4 +160,12 @@ public class TestContainerExecutor {
expected[6] = String.valueOf(cpuRate);
Assert.assertEquals(Arrays.toString(expected), Arrays.toString(command));
}
+
+ @Test
+ public void testReapContainer() throws Exception {
+ Container container = mock(Container.class);
+ ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
+ builder.setContainer(container).setUser("foo");
+ assertTrue(containerExecutor.reapContainer(builder.build()));
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d65dbe0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
index dcec4c3..31be858 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
@@ -26,7 +26,11 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -649,6 +653,28 @@ public class TestLinuxContainerExecutor {
TestResourceHandler.postExecContainers.contains(cid));
}
+ @Test
+ public void testRemoveDockerContainer() throws Exception {
+ ApplicationId appId = ApplicationId.newInstance(12345, 67890);
+ ApplicationAttemptId attemptId =
+ ApplicationAttemptId.newInstance(appId, 54321);
+ String cid = ContainerId.newContainerId(attemptId, 9876).toString();
+ LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
+ lce.removeDockerContainer(cid);
+ verify(lce, times(1)).removeDockerContainer(cid);
+ }
+
+ @Test
+ public void testReapContainer() throws Exception {
+ Container container = mock(Container.class);
+ LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
+ ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
+ builder.setContainer(container).setUser("foo");
+ ContainerReapContext ctx = builder.build();
+ lce.reapContainer(ctx);
+ verify(lce, times(1)).reapContainer(ctx);
+ }
+
private static class TestResourceHandler implements LCEResourcesHandler {
static Set<ContainerId> postExecContainers = new HashSet<ContainerId>();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org