You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ww...@apache.org on 2019/02/24 14:31:35 UTC

[hadoop] branch branch-3.2 updated: YARN-9238. Avoid allocating opportunistic containers to previous/removed/non-exist application attempt. Contributed by lujie.

This is an automated email from the ASF dual-hosted git repository.

wwei pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 604a915  YARN-9238. Avoid allocating opportunistic containers to previous/removed/non-exist application attempt. Contributed by lujie.
604a915 is described below

commit 604a915babf02ff4c5096e0b96af5713b0fc4faf
Author: Weiwei Yang <ww...@apache.org>
AuthorDate: Fri Feb 22 20:58:33 2019 +0800

    YARN-9238. Avoid allocating opportunistic containers to previous/removed/non-exist application attempt. Contributed by lujie.
    
    (cherry picked from commit 9c88695bcda0ffe4c7f49d643c649dfa1dce9bde)
---
 .../OpportunisticContainerAllocatorAMService.java  |  6 ++++
 ...stOpportunisticContainerAllocatorAMService.java | 32 ++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java
index 69a704e..562b54a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java
@@ -172,6 +172,12 @@ public class OpportunisticContainerAllocatorAMService
           ((AbstractYarnScheduler)rmContext.getScheduler())
               .getApplicationAttempt(appAttemptId);
 
+      if (!appAttempt.getApplicationAttemptId().equals(appAttemptId)){
+        LOG.error("Calling allocate on previous or removed or non "
+            + "existent application attempt " + appAttemptId);
+        return;
+      }
+
       OpportunisticContainerContext oppCtx =
           appAttempt.getOpportunisticContainerContext();
       oppCtx.updateNodeList(getLeastLoadedNodes());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java
index b2be1e7..221181d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java
@@ -81,10 +81,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
@@ -750,6 +753,35 @@ public class TestOpportunisticContainerAllocatorAMService {
   }
 
   @Test(timeout = 60000)
+  public void testAMCrashDuringAllocate() throws Exception {
+    MockNM nm = new MockNM("h:1234", 4096, rm.getResourceTrackerService());
+    nm.registerNode();
+
+    RMApp app = rm.submitApp(1 * GB, "app", "user", null, "default");
+    ApplicationAttemptId attemptId0 =
+        app.getCurrentAppAttempt().getAppAttemptId();
+    MockAM am = MockRM.launchAndRegisterAM(app, rm, nm);
+
+    //simulate AM crash by replacing the current attempt
+    //Do not use rm.failApplicationAttempt, the bug will skip due to
+    //ApplicationDoesNotExistInCacheException
+    CapacityScheduler scheduler= ((CapacityScheduler) rm.getRMContext().
+        getScheduler());
+    SchedulerApplication<FiCaSchedulerApp> schApp =
+        (SchedulerApplication<FiCaSchedulerApp>)scheduler.
+        getSchedulerApplications().get(attemptId0.getApplicationId());
+    final ApplicationAttemptId appAttemptId1 = TestUtils.
+        getMockApplicationAttemptId(1, 1);
+    schApp.setCurrentAppAttempt(new FiCaSchedulerApp(appAttemptId1,
+        null, scheduler.getQueue("default"), null, rm.getRMContext()));
+
+    //start to allocate
+    am.allocate(
+        Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1),
+        "*", Resources.createResource(1 * GB), 2)), null);
+  }
+
+  @Test(timeout = 60000)
   public void testNodeRemovalDuringAllocate() throws Exception {
     MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
     MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org