You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by jl...@apache.org on 2014/08/22 00:45:36 UTC
svn commit: r1619616 - in
/hadoop/common/branches/branch-2/hadoop-yarn-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/
hadoop-yarn/hadoop-yarn-server/h...
Author: jlowe
Date: Thu Aug 21 22:45:35 2014
New Revision: 1619616
URL: http://svn.apache.org/r1619616
Log:
svn merge -c 1619614 FIXES: YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled. Contributed by Jian He
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1619616&r1=1619615&r2=1619616&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Thu Aug 21 22:45:35 2014
@@ -211,6 +211,9 @@ Release 2.6.0 - UNRELEASED
YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas
via aw)
+ YARN-2434. RM should not recover containers from previously failed attempt
+ when AM restart is not enabled (Jian He via jlowe)
+
Release 2.5.0 - 2014-08-11
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java?rev=1619616&r1=1619615&r2=1619616&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java Thu Aug 21 22:45:35 2014
@@ -273,6 +273,19 @@ public abstract class AbstractYarnSchedu
SchedulerApplicationAttempt schedulerAttempt =
schedulerApp.getCurrentAppAttempt();
+ if (!rmApp.getApplicationSubmissionContext()
+ .getKeepContainersAcrossApplicationAttempts()) {
+ // Do not recover containers for stopped attempt or previous attempt.
+ if (schedulerAttempt.isStopped()
+ || !schedulerAttempt.getApplicationAttemptId().equals(
+ container.getContainerId().getApplicationAttemptId())) {
+ LOG.info("Skip recovering container " + container
+ + " for already stopped attempt.");
+ killOrphanContainerOnNode(nm, container);
+ continue;
+ }
+ }
+
// create container
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java?rev=1619616&r1=1619615&r2=1619616&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java Thu Aug 21 22:45:35 2014
@@ -513,6 +513,19 @@ public class TestWorkPreservingRMRestart
// just-recovered containers.
assertNull(scheduler.getRMContainer(runningContainer.getContainerId()));
assertNull(scheduler.getRMContainer(completedContainer.getContainerId()));
+
+ rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
+
+ MockNM nm2 =
+ new MockNM("127.1.1.1:4321", 8192, rm2.getResourceTrackerService());
+ NMContainerStatus previousAttemptContainer =
+ TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 4,
+ ContainerState.RUNNING);
+ nm2.registerNode(Arrays.asList(previousAttemptContainer), null);
+ // Wait for RM to settle down on recovering containers;
+ Thread.sleep(3000);
+ // check containers from previous failed attempt should not be recovered.
+ assertNull(scheduler.getRMContainer(previousAttemptContainer.getContainerId()));
}
// Apps already completed before RM restart. Restarted RM scheduler should not