You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ro...@apache.org on 2019/08/29 04:07:22 UTC

[hadoop] branch branch-3.2 updated: YARN-9640. Slow event processing could cause too many attempt unregister events. Contributed by Bibin A Chundatt.

This is an automated email from the ASF dual-hosted git repository.

rohithsharmaks pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 81c0809  YARN-9640. Slow event processing could cause too many attempt unregister events. Contributed by Bibin A Chundatt.
81c0809 is described below

commit 81c08094634776c069f8595be7519af5681a9bd5
Author: Rohith Sharma K S <ro...@apache.org>
AuthorDate: Thu Aug 29 09:30:20 2019 +0530

    YARN-9640. Slow event processing could cause too many attempt unregister events. Contributed by Bibin A Chundatt.
---
 .../resourcemanager/ApplicationMasterService.java  | 14 ++++--
 .../TestApplicationMasterService.java              | 54 ++++++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index d9527d4..883da10 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -93,6 +93,8 @@ public class ApplicationMasterService extends AbstractService implements
       RecordFactoryProvider.getRecordFactory(null);
   private final ConcurrentMap<ApplicationAttemptId, AllocateResponseLock> responseMap =
       new ConcurrentHashMap<ApplicationAttemptId, AllocateResponseLock>();
+  private final ConcurrentHashMap<ApplicationAttemptId, Boolean>
+      finishedAttemptCache = new ConcurrentHashMap<>();
   protected final RMContext rmContext;
   private final AMSProcessingChain amsProcessingChain;
   private boolean timelineServiceV2Enabled;
@@ -337,11 +339,14 @@ public class ApplicationMasterService extends AbstractService implements
         throw new ApplicationMasterNotRegisteredException(message);
       }
 
-      this.amLivelinessMonitor.receivedPing(applicationAttemptId);
       FinishApplicationMasterResponse response =
           FinishApplicationMasterResponse.newInstance(false);
-      this.amsProcessingChain.finishApplicationMaster(
-          applicationAttemptId, request, response);
+      if (finishedAttemptCache.putIfAbsent(applicationAttemptId, true)
+          == null) {
+        this.amsProcessingChain
+            .finishApplicationMaster(applicationAttemptId, request, response);
+      }
+      this.amLivelinessMonitor.receivedPing(applicationAttemptId);
       return response;
     }
   }
@@ -490,6 +495,7 @@ public class ApplicationMasterService extends AbstractService implements
   public void unregisterAttempt(ApplicationAttemptId attemptId) {
     LOG.info("Unregistering app attempt : " + attemptId);
     responseMap.remove(attemptId);
+    finishedAttemptCache.remove(attemptId);
     rmContext.getNMTokenSecretManager().unregisterApplicationAttempt(attemptId);
   }
 
@@ -504,6 +510,8 @@ public class ApplicationMasterService extends AbstractService implements
     if (this.server != null) {
       this.server.stop();
     }
+    responseMap.clear();
+    finishedAttemptCache.clear();
     super.serviceStop();
   }
   
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
index 562ba5d..bfe908d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
@@ -60,6 +60,9 @@ import org.apache.hadoop.yarn.api.records.ResourceInformation;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.UpdateContainerRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.DrainDispatcher;
+import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
 import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException;
 import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
@@ -70,6 +73,7 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent;
@@ -990,4 +994,54 @@ public class TestApplicationMasterService {
         app1.getApplicationId()).getOriginalTrackingUrl());
     rm.stop();
   }
+
+  @Test(timeout = 120000)
+  public void testRepeatedFinishApplicationMaster() throws Exception {
+
+    CountingDispatcher dispatcher = new CountingDispatcher();
+    MockRM rm = new MockRM(conf) {
+      @Override
+      protected Dispatcher createDispatcher() {
+        return dispatcher;
+      }
+    };
+
+    try {
+      rm.start();
+      // Register node1
+      MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
+      // Submit an application
+      RMApp app1 = rm.submitApp(2048);
+      MockAM am1 = MockRM.launchAM(app1, rm, nm1);
+      am1.registerAppAttempt();
+      FinishApplicationMasterRequest req = FinishApplicationMasterRequest
+          .newInstance(FinalApplicationStatus.FAILED, "", "");
+      for (int i = 0; i < 10; i++) {
+        am1.unregisterAppAttempt(req, false);
+      }
+      Assert.assertEquals("Expecting only one event", 1,
+          dispatcher.getEventCount());
+    } finally {
+      rm.stop();
+    }
+  }
+
+  static class CountingDispatcher extends DrainDispatcher {
+    private int eventreceived = 0;
+
+    @SuppressWarnings("rawtypes")
+    @Override
+    protected void dispatch(Event event) {
+      if (event.getType() == RMAppAttemptEventType.UNREGISTERED) {
+        eventreceived++;
+      } else {
+        super.dispatch(event);
+      }
+    }
+
+    public int getEventCount() {
+      return eventreceived;
+    }
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org