You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by as...@apache.org on 2017/09/12 20:09:03 UTC
hadoop git commit: YARN-6059. Update paused container state in the NM
state store. (Hitesh Sharma via asuresh)
Repository: hadoop
Updated Branches:
refs/heads/YARN-5972 026f877b9 -> d5528e0a5
YARN-6059. Update paused container state in the NM state store. (Hitesh Sharma via asuresh)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d5528e0a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d5528e0a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d5528e0a
Branch: refs/heads/YARN-5972
Commit: d5528e0a5b8fabf2e8c7a5554f781655b0789549
Parents: 026f877
Author: Arun Suresh <as...@apache.org>
Authored: Tue Sep 12 12:22:00 2017 -0700
Committer: Arun Suresh <as...@apache.org>
Committed: Tue Sep 12 12:22:00 2017 -0700
----------------------------------------------------------------------
.../container/ContainerImpl.java | 16 ++-
.../launcher/ContainerLaunch.java | 28 ++++-
.../launcher/ContainersLauncher.java | 10 ++
.../launcher/ContainersLauncherEventType.java | 3 +-
.../launcher/RecoverPausedContainerLaunch.java | 124 +++++++++++++++++++
.../launcher/RecoveredContainerLaunch.java | 3 +-
.../recovery/NMLeveldbStateStoreService.java | 42 ++++++-
.../recovery/NMNullStateStoreService.java | 9 ++
.../recovery/NMStateStoreService.java | 26 +++-
.../recovery/NMMemoryStateStoreService.java | 13 ++
.../TestNMLeveldbStateStoreService.java | 17 +++
11 files changed, 273 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 95ebfd5..9b9c47f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -836,10 +836,18 @@ public class ContainerImpl implements Container {
@SuppressWarnings("unchecked") // dispatcher not typed
private void sendScheduleEvent() {
- dispatcher.getEventHandler().handle(
- new ContainerSchedulerEvent(this,
- ContainerSchedulerEventType.SCHEDULE_CONTAINER)
- );
+ if (recoveredStatus == RecoveredContainerStatus.PAUSED) {
+ // Recovery is not supported for paused container so we raise the
+ // launch event which will proceed to kill the paused container instead
+ // of raising the schedule event.
+ ContainersLauncherEventType launcherEvent;
+ launcherEvent = ContainersLauncherEventType.RECOVER_PAUSED_CONTAINER;
+ dispatcher.getEventHandler()
+ .handle(new ContainersLauncherEvent(this, launcherEvent));
+ } else {
+ dispatcher.getEventHandler().handle(new ContainerSchedulerEvent(this,
+ ContainerSchedulerEventType.SCHEDULE_CONTAINER));
+ }
}
@SuppressWarnings("unchecked") // dispatcher not typed
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index bc6323a..21b5c06 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -837,6 +837,14 @@ public class ContainerLaunch implements Callable<Integer> {
dispatcher.getEventHandler().handle(new ContainerEvent(
containerId,
ContainerEventType.CONTAINER_PAUSED));
+
+ try {
+ this.context.getNMStateStore().storeContainerPaused(
+ container.getContainerId());
+ } catch (IOException e) {
+ LOG.warn("Could not store container [" + container.getContainerId()
+ + "] state. The Container has been paused.", e);
+ }
} catch (Exception e) {
String message =
"Exception when trying to pause container " + containerIdStr
@@ -873,12 +881,20 @@ public class ContainerLaunch implements Callable<Integer> {
// If the container has already started
try {
- exec.resumeContainer(container);
- // ResumeContainer is a blocking call. We are here almost means the
- // container is resumed, so send out the event.
- dispatcher.getEventHandler().handle(new ContainerEvent(
- containerId,
- ContainerEventType.CONTAINER_RESUMED));
+ exec.resumeContainer(container);
+ // ResumeContainer is a blocking call. We are here almost means the
+ // container is resumed, so send out the event.
+ dispatcher.getEventHandler().handle(new ContainerEvent(
+ containerId,
+ ContainerEventType.CONTAINER_RESUMED));
+
+ try {
+ this.context.getNMStateStore().removeContainerPaused(
+ container.getContainerId());
+ } catch (IOException e) {
+ LOG.warn("Could not store container [" + container.getContainerId()
+ + "] state. The Container has been resumed.", e);
+ }
} catch (Exception e) {
String message =
"Exception when trying to resume container " + containerIdStr
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
index ca69712..9f6ef74 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
@@ -139,6 +139,16 @@ public class ContainersLauncher extends AbstractService
containerLauncher.submit(launch);
running.put(containerId, launch);
break;
+ case RECOVER_PAUSED_CONTAINER:
+ // Recovery for paused containers is not supported, thus here
+ // we locate any paused containers, and terminate them.
+ app = context.getApplications().get(
+ containerId.getApplicationAttemptId().getApplicationId());
+ launch = new RecoverPausedContainerLaunch(context, getConfig(),
+ dispatcher, exec, app, event.getContainer(), dirsHandler,
+ containerManager);
+ containerLauncher.submit(launch);
+ break;
case CLEANUP_CONTAINER:
case CLEANUP_CONTAINER_FOR_REINIT:
ContainerLaunch launcher = running.remove(containerId);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java
index 1054e06..847ee34 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java
@@ -26,6 +26,7 @@ public enum ContainersLauncherEventType {
CLEANUP_CONTAINER_FOR_REINIT, // The process(grp) itself.
SIGNAL_CONTAINER,
PAUSE_CONTAINER,
- RESUME_CONTAINER
+ RESUME_CONTAINER,
+ RECOVER_PAUSED_CONTAINER
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java
new file mode 100644
index 0000000..14cab9a
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.*;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+
+/**
+ * This is a ContainerLaunch which has been recovered after an NM restart for
+ * pause containers (for rolling upgrades)
+ */
+public class RecoverPausedContainerLaunch extends ContainerLaunch {
+
+ private static final Log LOG = LogFactory.getLog(
+ RecoveredContainerLaunch.class);
+
+ public RecoverPausedContainerLaunch(Context context,
+ Configuration configuration, Dispatcher dispatcher,
+ ContainerExecutor exec, Application app, Container container,
+ LocalDirsHandlerService dirsHandler,
+ ContainerManagerImpl containerManager) {
+ super(context, configuration, dispatcher, exec, app, container, dirsHandler,
+ containerManager);
+ }
+
+ /**
+ * Cleanup the paused container by issuing a kill on it.
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ public Integer call() {
+ int retCode = ContainerExecutor.ExitCode.LOST.getExitCode();
+ ContainerId containerId = container.getContainerId();
+ String appIdStr =
+ containerId.getApplicationAttemptId().getApplicationId().toString();
+ String containerIdStr = containerId.toString();
+
+ boolean notInterrupted = true;
+ try {
+ File pidFile = locatePidFile(appIdStr, containerIdStr);
+ if (pidFile != null) {
+ String pidPathStr = pidFile.getPath();
+ pidFilePath = new Path(pidPathStr);
+ exec.activateContainer(containerId, pidFilePath);
+ exec.signalContainer(new ContainerSignalContext.Builder()
+ .setContainer(container)
+ .setUser(container.getUser())
+ .setSignal(ContainerExecutor.Signal.KILL)
+ .build());
+ } else {
+ LOG.warn("Unable to locate pid file for container " + containerIdStr);
+ }
+
+ } catch (InterruptedIOException e) {
+ LOG.warn("Interrupted while waiting for exit code from " + containerId);
+ notInterrupted = false;
+ } catch (IOException e) {
+ LOG.error("Unable to kill the paused container " + containerIdStr, e);
+ } finally {
+ if (notInterrupted) {
+ this.completed.set(true);
+ exec.deactivateContainer(containerId);
+ try {
+ getContext().getNMStateStore()
+ .storeContainerCompleted(containerId, retCode);
+ } catch (IOException e) {
+ LOG.error("Unable to set exit code for container " + containerId);
+ }
+ }
+ }
+
+ LOG.warn("Recovered container exited with a non-zero exit code "
+ + retCode);
+ this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
+ containerId,
+ ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, retCode,
+ "Container exited with a non-zero exit code " + retCode));
+
+ return retCode;
+ }
+
+ private File locatePidFile(String appIdStr, String containerIdStr) {
+ String pidSubpath= getPidFileSubpath(appIdStr, containerIdStr);
+ for (String dir : getContext().getLocalDirsHandler().
+ getLocalDirsForRead()) {
+ File pidFile = new File(dir, pidSubpath);
+ if (pidFile.exists()) {
+ return pidFile;
+ }
+ }
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
index 2eba0df..17ddd77 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
@@ -40,10 +40,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext;
-
/**
* This is a ContainerLaunch which has been recovered after an NM restart (for
- * rolling upgrades)
+ * rolling upgrades).
*/
public class RecoveredContainerLaunch extends ContainerLaunch {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
index db931f8..2f9c0a7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
@@ -119,6 +119,7 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
private static final String CONTAINER_DIAGS_KEY_SUFFIX = "/diagnostics";
private static final String CONTAINER_LAUNCHED_KEY_SUFFIX = "/launched";
private static final String CONTAINER_QUEUED_KEY_SUFFIX = "/queued";
+ private static final String CONTAINER_PAUSED_KEY_SUFFIX = "/paused";
private static final String CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX =
"/resourceChanged";
private static final String CONTAINER_KILLED_KEY_SUFFIX = "/killed";
@@ -272,9 +273,16 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
if (rcs.status == RecoveredContainerStatus.REQUESTED) {
rcs.status = RecoveredContainerStatus.QUEUED;
}
+ } else if (suffix.equals(CONTAINER_PAUSED_KEY_SUFFIX)) {
+ if ((rcs.status == RecoveredContainerStatus.LAUNCHED)
+ ||(rcs.status == RecoveredContainerStatus.QUEUED)
+ ||(rcs.status == RecoveredContainerStatus.REQUESTED)) {
+ rcs.status = RecoveredContainerStatus.PAUSED;
+ }
} else if (suffix.equals(CONTAINER_LAUNCHED_KEY_SUFFIX)) {
if ((rcs.status == RecoveredContainerStatus.REQUESTED)
- || (rcs.status == RecoveredContainerStatus.QUEUED)) {
+ || (rcs.status == RecoveredContainerStatus.QUEUED)
+ ||(rcs.status == RecoveredContainerStatus.PAUSED)) {
rcs.status = RecoveredContainerStatus.LAUNCHED;
}
} else if (suffix.equals(CONTAINER_KILLED_KEY_SUFFIX)) {
@@ -367,6 +375,37 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
}
@Override
+ public void storeContainerPaused(ContainerId containerId) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("storeContainerPaused: containerId=" + containerId);
+ }
+
+ String key = CONTAINERS_KEY_PREFIX + containerId.toString()
+ + CONTAINER_PAUSED_KEY_SUFFIX;
+ try {
+ db.put(bytes(key), EMPTY_VALUE);
+ } catch (DBException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public void removeContainerPaused(ContainerId containerId)
+ throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("removeContainerPaused: containerId=" + containerId);
+ }
+
+ String key = CONTAINERS_KEY_PREFIX + containerId.toString()
+ + CONTAINER_PAUSED_KEY_SUFFIX;
+ try {
+ db.delete(bytes(key));
+ } catch (DBException e) {
+ throw new IOException(e);
+ }
+ }
+
+ @Override
public void storeContainerDiagnostics(ContainerId containerId,
StringBuilder diagnostics) throws IOException {
if (LOG.isDebugEnabled()) {
@@ -510,6 +549,7 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
batch.delete(bytes(keyPrefix + CONTAINER_DIAGS_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_LAUNCHED_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_QUEUED_KEY_SUFFIX));
+ batch.delete(bytes(keyPrefix + CONTAINER_PAUSED_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_KILLED_KEY_SUFFIX));
batch.delete(bytes(keyPrefix + CONTAINER_EXIT_CODE_KEY_SUFFIX));
List<String> unknownKeysForContainer = containerUnknownKeySuffixes
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
index dc1cece..d1d0696 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
@@ -80,6 +80,15 @@ public class NMNullStateStoreService extends NMStateStoreService {
}
@Override
+ public void storeContainerPaused(ContainerId containerId) throws IOException {
+ }
+
+ @Override
+ public void removeContainerPaused(ContainerId containerId)
+ throws IOException {
+ }
+
+ @Override
public void storeContainerDiagnostics(ContainerId containerId,
StringBuilder diagnostics) throws IOException {
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
index 62a2b9f..999d2d9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
@@ -73,7 +73,8 @@ public abstract class NMStateStoreService extends AbstractService {
REQUESTED,
QUEUED,
LAUNCHED,
- COMPLETED
+ COMPLETED,
+ PAUSED
}
public static class RecoveredContainerState {
@@ -349,9 +350,9 @@ public abstract class NMStateStoreService extends AbstractService {
}
/**
- * Load the state of applications
- * @return recovered state for applications
- * @throws IOException
+ * Load the state of applications.
+ * @return recovered state for applications.
+ * @throws IOException IO Exception.
*/
public abstract RecoveredApplicationsState loadApplicationsState()
throws IOException;
@@ -403,6 +404,23 @@ public abstract class NMStateStoreService extends AbstractService {
throws IOException;
/**
+ * Record that a container has been paused at the NM.
+ * @param containerId the container ID.
+ * @throws IOException IO Exception.
+ */
+ public abstract void storeContainerPaused(ContainerId containerId)
+ throws IOException;
+
+ /**
+ * Record that a container has been resumed at the NM by removing the
+ * fact that it has be paused.
+ * @param containerId the container ID.
+ * @throws IOException IO Exception.
+ */
+ public abstract void removeContainerPaused(ContainerId containerId)
+ throws IOException;
+
+ /**
* Record that a container has been launched
* @param containerId the container ID
* @throws IOException
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
index 6d6875d..59a225a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
@@ -145,6 +145,19 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
}
@Override
+ public void storeContainerPaused(ContainerId containerId) throws IOException {
+ RecoveredContainerState rcs = getRecoveredContainerState(containerId);
+ rcs.status = RecoveredContainerStatus.PAUSED;
+ }
+
+ @Override
+ public void removeContainerPaused(ContainerId containerId)
+ throws IOException {
+ RecoveredContainerState rcs = getRecoveredContainerState(containerId);
+ rcs.status = RecoveredContainerStatus.LAUNCHED;
+ }
+
+ @Override
public synchronized void storeContainerDiagnostics(ContainerId containerId,
StringBuilder diagnostics) throws IOException {
RecoveredContainerState rcs = getRecoveredContainerState(containerId);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d5528e0a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
index b76f1ff..8c13356 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
@@ -289,6 +289,23 @@ public class TestNMLeveldbStateStoreService {
assertEquals(containerReq, rcs.getStartRequest());
assertEquals(diags.toString(), rcs.getDiagnostics());
+ // pause the container, and verify recovered
+ stateStore.storeContainerPaused(containerId);
+ restartStateStore();
+ recoveredContainers = stateStore.loadContainersState();
+ assertEquals(1, recoveredContainers.size());
+ rcs = recoveredContainers.get(0);
+ assertEquals(RecoveredContainerStatus.PAUSED, rcs.getStatus());
+ assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
+ assertEquals(false, rcs.getKilled());
+ assertEquals(containerReq, rcs.getStartRequest());
+
+ // Resume the container
+ stateStore.removeContainerPaused(containerId);
+ restartStateStore();
+ recoveredContainers = stateStore.loadContainersState();
+ assertEquals(1, recoveredContainers.size());
+
// increase the container size, and verify recovered
stateStore.storeContainerResourceChanged(containerId, 2,
Resource.newInstance(2468, 4));
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org