You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bi...@apache.org on 2021/01/08 05:23:18 UTC

[hadoop] branch trunk updated: YARN-10538: Add RECOMMISSIONING nodes to the list of updated nodes returned to the AM (#2564)

This is an automated email from the ASF dual-hosted git repository.

bibinchundatt pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 1b17910  YARN-10538: Add RECOMMISSIONING nodes to the list of updated nodes returned to the AM (#2564)
1b17910 is described below

commit 1b1791075a8a45ff8c7d25d52cb014d0ce858cc2
Author: srinivasst <40...@users.noreply.github.com>
AuthorDate: Fri Jan 8 10:52:52 2021 +0530

    YARN-10538: Add RECOMMISSIONING nodes to the list of updated nodes returned to the AM (#2564)
    
    Contributed by Srinivas S T
---
 .../server/resourcemanager/rmnode/RMNodeImpl.java  |  7 +++
 .../TestAMRMRPCNodeUpdates.java                    | 54 ++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
index fc7e88b..cec9915 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
@@ -1325,6 +1325,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
           .handle(
               new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption
                   .newInstance(rmNode.totalCapability, 0)));
+
+      // Notify NodesListManager to notify all RMApp that this node has been
+      // recommissioned so that each Application Master can take any required
+      // actions.
+      rmNode.context.getDispatcher().getEventHandler().handle(
+              new NodesListManagerEvent(
+                      NodesListManagerEventType.NODE_USABLE, rmNode));
     }
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCNodeUpdates.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCNodeUpdates.java
index a14130f..c4291b6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCNodeUpdates.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCNodeUpdates.java
@@ -22,6 +22,7 @@ import java.security.PrivilegedExceptionAction;
 import java.util.List;
 
 import org.apache.hadoop.yarn.server.resourcemanager.MockRMAppSubmitter;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
 import org.junit.Assert;
 
 import org.apache.hadoop.conf.Configuration;
@@ -91,6 +92,12 @@ public class TestAMRMRPCNodeUpdates {
     rm.drainEvents();
   }
 
+  private void syncNodeRecommissioning(MockNM nm) throws Exception {
+    rm.sendNodeEvent(nm, RMNodeEventType.RECOMMISSION);
+    rm.waitForState(nm.getNodeId(), NodeState.RUNNING);
+    rm.drainEvents();
+  }
+
   private AllocateResponse allocate(final ApplicationAttemptId attemptId,
       final AllocateRequest req) throws Exception {
     UserGroupInformation ugi =
@@ -141,6 +148,53 @@ public class TestAMRMRPCNodeUpdates {
   }
 
   @Test
+  public void testAMRMRecommissioningNodes() throws Exception {
+    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10000);
+    MockNM nm2 = rm.registerNode("127.0.0.2:1234", 10000);
+    rm.drainEvents();
+
+    RMApp app1 = MockRMAppSubmitter.submitWithMemory(2000, rm);
+
+    // Trigger the scheduling so the AM gets 'launched' on nm1
+    nm1.nodeHeartbeat(true);
+
+    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
+    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
+
+    // register AM returns no unusable node
+    am1.registerAppAttempt();
+
+    // DECOMMISSION nm2
+    Integer decommissioningTimeout = 600;
+    syncNodeGracefulDecommission(nm2, decommissioningTimeout);
+
+    AllocateRequest allocateRequest1 =
+            AllocateRequest.newInstance(0, 0F, null, null, null);
+    AllocateResponse response1 =
+            allocate(attempt1.getAppAttemptId(), allocateRequest1);
+    List<NodeReport> updatedNodes = response1.getUpdatedNodes();
+    Assert.assertEquals(1, updatedNodes.size());
+    NodeReport nr = updatedNodes.iterator().next();
+    Assert.assertEquals(
+            decommissioningTimeout, nr.getDecommissioningTimeout());
+    Assert.assertEquals(
+            NodeUpdateType.NODE_DECOMMISSIONING, nr.getNodeUpdateType());
+
+    // Wait for nm2 to RECOMMISSION
+    syncNodeRecommissioning(nm2);
+
+    AllocateRequest allocateRequest2 = AllocateRequest
+            .newInstance(response1.getResponseId(), 0F, null, null, null);
+    AllocateResponse response2 =
+            allocate(attempt1.getAppAttemptId(), allocateRequest2);
+    List<NodeReport> updatedNodes2 = response2.getUpdatedNodes();
+    Assert.assertEquals(1, updatedNodes2.size());
+    NodeReport nr2 = updatedNodes2.iterator().next();
+    Assert.assertEquals(
+            NodeUpdateType.NODE_USABLE, nr2.getNodeUpdateType());
+  }
+
+  @Test
   public void testAMRMUnusableNodes() throws Exception {
     
     MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10000);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org