You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jl...@apache.org on 2014/11/13 17:12:10 UTC
hadoop git commit: YARN-2846. Incorrect persist exit code for running
containers in reacquireContainer() that interrupted by NodeManager restart.
Contributed by Junping Du
Repository: hadoop
Updated Branches:
refs/heads/trunk 177e8090f -> 33ea5ae92
YARN-2846. Incorrect persist exit code for running containers in reacquireContainer() that interrupted by NodeManager restart. Contributed by Junping Du
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/33ea5ae9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/33ea5ae9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/33ea5ae9
Branch: refs/heads/trunk
Commit: 33ea5ae92b9dd3abace104903d9a94d17dd75af5
Parents: 177e809
Author: Jason Lowe <jl...@apache.org>
Authored: Thu Nov 13 16:11:04 2014 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Thu Nov 13 16:11:04 2014 +0000
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 4 ++++
.../server/nodemanager/ContainerExecutor.java | 21 +++++++-------------
.../nodemanager/LinuxContainerExecutor.java | 2 +-
.../launcher/RecoveredContainerLaunch.java | 20 ++++++++++++-------
4 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33ea5ae9/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 01dc246..326d33c 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -955,6 +955,10 @@ Release 2.6.0 - 2014-11-15
YARN-2794. Fixed log messages about distributing system-credentials. (Jian He via
zjshen)
+ YARN-2846. Incorrect persist exit code for running containers in
+ reacquireContainer() that interrupted by NodeManager restart. (Junping Du
+ via jlowe)
+
Release 2.5.2 - 2014-11-10
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33ea5ae9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 8133413..327f882 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -159,9 +159,10 @@ public abstract class ContainerExecutor implements Configurable {
* @param containerId The ID of the container to reacquire
* @return The exit code of the pre-existing container
* @throws IOException
+ * @throws InterruptedException
*/
public int reacquireContainer(String user, ContainerId containerId)
- throws IOException {
+ throws IOException, InterruptedException {
Path pidPath = getPidFilePath(containerId);
if (pidPath == null) {
LOG.warn(containerId + " is not active, returning terminated error");
@@ -175,13 +176,8 @@ public abstract class ContainerExecutor implements Configurable {
}
LOG.info("Reacquiring " + containerId + " with pid " + pid);
- try {
- while(isContainerProcessAlive(user, pid)) {
- Thread.sleep(1000);
- }
- } catch (InterruptedException e) {
- throw new IOException("Interrupted while waiting for process " + pid
- + " to exit", e);
+ while(isContainerProcessAlive(user, pid)) {
+ Thread.sleep(1000);
}
// wait for exit code file to appear
@@ -194,12 +190,9 @@ public abstract class ContainerExecutor implements Configurable {
LOG.info(containerId + " was deactivated");
return ExitCode.TERMINATED.getExitCode();
}
- try {
- Thread.sleep(sleepMsec);
- } catch (InterruptedException e) {
- throw new IOException(
- "Interrupted while waiting for exit code from " + containerId, e);
- }
+
+ Thread.sleep(sleepMsec);
+
msecLeft -= sleepMsec;
}
if (msecLeft < 0) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33ea5ae9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 60cb058..4db4ef2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -347,7 +347,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
@Override
public int reacquireContainer(String user, ContainerId containerId)
- throws IOException {
+ throws IOException, InterruptedException {
try {
return super.reacquireContainer(user, containerId);
} finally {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/33ea5ae9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
index 446695a..03a39aa 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
@@ -73,6 +73,7 @@ public class RecoveredContainerLaunch extends ContainerLaunch {
dispatcher.getEventHandler().handle(new ContainerEvent(containerId,
ContainerEventType.CONTAINER_LAUNCHED));
+ boolean notInterrupted = true;
try {
File pidFile = locatePidFile(appIdStr, containerIdStr);
if (pidFile != null) {
@@ -85,14 +86,19 @@ public class RecoveredContainerLaunch extends ContainerLaunch {
}
} catch (IOException e) {
LOG.error("Unable to recover container " + containerIdStr, e);
+ } catch (InterruptedException e) {
+ LOG.warn("Interrupted while waiting for exit code from " + containerId);
+ notInterrupted = false;
} finally {
- this.completed.set(true);
- exec.deactivateContainer(containerId);
- try {
- getContext().getNMStateStore().storeContainerCompleted(containerId,
- retCode);
- } catch (IOException e) {
- LOG.error("Unable to set exit code for container " + containerId);
+ if (notInterrupted) {
+ this.completed.set(true);
+ exec.deactivateContainer(containerId);
+ try {
+ getContext().getNMStateStore().storeContainerCompleted(containerId,
+ retCode);
+ } catch (IOException e) {
+ LOG.error("Unable to set exit code for container " + containerId);
+ }
}
}