You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2014/08/27 19:37:07 UTC

[07/29] git commit: YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled. Contributed by Jian He

YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled. Contributed by Jian He


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619614 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4236c660
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4236c660
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4236c660

Branch: refs/heads/HDFS-6584
Commit: 4236c6600eda9cdda708d02f3a5a3fe31228f70c
Parents: da4ba50
Author: Jason Darrell Lowe <jl...@apache.org>
Authored: Thu Aug 21 22:41:34 2014 +0000
Committer: Jason Darrell Lowe <jl...@apache.org>
Committed: Thu Aug 21 22:41:34 2014 +0000

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                        |  3 +++
 .../scheduler/AbstractYarnScheduler.java               | 13 +++++++++++++
 .../resourcemanager/TestWorkPreservingRMRestart.java   | 13 +++++++++++++
 3 files changed, 29 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/4236c660/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index ed162ba..df0a29d 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -231,6 +231,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas 
     via aw)
 
+    YARN-2434. RM should not recover containers from previously failed attempt
+    when AM restart is not enabled (Jian He via jlowe)
+
 Release 2.5.0 - 2014-08-11
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4236c660/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
index 72ee7db..ab56bb9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
@@ -273,6 +273,19 @@ public abstract class AbstractYarnScheduler
       SchedulerApplicationAttempt schedulerAttempt =
           schedulerApp.getCurrentAppAttempt();
 
+      if (!rmApp.getApplicationSubmissionContext()
+        .getKeepContainersAcrossApplicationAttempts()) {
+        // Do not recover containers for stopped attempt or previous attempt.
+        if (schedulerAttempt.isStopped()
+            || !schedulerAttempt.getApplicationAttemptId().equals(
+              container.getContainerId().getApplicationAttemptId())) {
+          LOG.info("Skip recovering container " + container
+              + " for already stopped attempt.");
+          killOrphanContainerOnNode(nm, container);
+          continue;
+        }
+      }
+
       // create container
       RMContainer rmContainer = recoverAndCreateContainer(container, nm);
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4236c660/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
index df64d4c..d6af0d7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
@@ -513,6 +513,19 @@ public class TestWorkPreservingRMRestart {
     // just-recovered containers.
     assertNull(scheduler.getRMContainer(runningContainer.getContainerId()));
     assertNull(scheduler.getRMContainer(completedContainer.getContainerId()));
+
+    rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
+
+    MockNM nm2 =
+        new MockNM("127.1.1.1:4321", 8192, rm2.getResourceTrackerService());
+    NMContainerStatus previousAttemptContainer =
+        TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 4,
+          ContainerState.RUNNING);
+    nm2.registerNode(Arrays.asList(previousAttemptContainer), null);
+    // Wait for RM to settle down on recovering containers;
+    Thread.sleep(3000);
+    // check containers from previous failed attempt should not be recovered.
+    assertNull(scheduler.getRMContainer(previousAttemptContainer.getContainerId()));
   }
 
   // Apps already completed before RM restart. Restarted RM scheduler should not