You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2018/05/14 18:09:38 UTC

hadoop git commit: YARN-8130 Race condition when container events are published for KILLED applications. (Rohith Sharma K S via Haibo Chen)

Repository: hadoop
Updated Branches:
  refs/heads/trunk 6beb25ab7 -> 2d00a0c71


YARN-8130 Race condition when container events are published for KILLED applications. (Rohith Sharma K S via Haibo Chen)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2d00a0c7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2d00a0c7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2d00a0c7

Branch: refs/heads/trunk
Commit: 2d00a0c71b5dde31e2cf8fcb96d9d541d41fb879
Parents: 6beb25a
Author: Haibo Chen <ha...@apache.org>
Authored: Mon May 14 11:08:42 2018 -0700
Committer: Haibo Chen <ha...@apache.org>
Committed: Mon May 14 11:08:42 2018 -0700

----------------------------------------------------------------------
 .../timelineservice/NMTimelineEvent.java        |  12 ++-
 .../timelineservice/NMTimelineEventType.java    |   3 +
 .../timelineservice/NMTimelinePublisher.java    |  23 +++--
 .../TestNMTimelinePublisher.java                | 102 ++++++++++++++++---
 4 files changed, 113 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d00a0c7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEvent.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEvent.java
index f275b37..1ee27d2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEvent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEvent.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.timelineservice;
 
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.event.AbstractEvent;
 
 /**
@@ -25,11 +26,14 @@ import org.apache.hadoop.yarn.event.AbstractEvent;
  * timelineservice v2.
  */
 public class NMTimelineEvent extends AbstractEvent<NMTimelineEventType> {
-  public NMTimelineEvent(NMTimelineEventType type) {
-    super(type);
+  private ApplicationId appId;
+
+  public NMTimelineEvent(NMTimelineEventType type, ApplicationId appId) {
+    super(type, System.currentTimeMillis());
+    this.appId=appId;
   }
 
-  public NMTimelineEvent(NMTimelineEventType type, long timestamp) {
-    super(type, timestamp);
+  public ApplicationId getApplicationId() {
+    return appId;
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d00a0c7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEventType.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEventType.java
index b4ae45a..5d81c94 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelineEventType.java
@@ -24,4 +24,7 @@ package org.apache.hadoop.yarn.server.nodemanager.timelineservice;
 public enum NMTimelineEventType {
   // Publish the NM Timeline entity
   TIMELINE_ENTITY_PUBLISH,
+
+  // Stop and remove timeline client
+  STOP_TIMELINE_CLIENT
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d00a0c7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java
index 13d5c67..f451726 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java
@@ -96,7 +96,7 @@ public class NMTimelinePublisher extends CompositeService {
 
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
-    dispatcher = new AsyncDispatcher("NM Timeline dispatcher");
+    dispatcher = createDispatcher();
     dispatcher.register(NMTimelineEventType.class,
         new ForwardingEventHandler());
     addIfService(dispatcher);
@@ -113,6 +113,10 @@ public class NMTimelinePublisher extends CompositeService {
     super.serviceInit(conf);
   }
 
+  protected AsyncDispatcher createDispatcher() {
+    return new AsyncDispatcher("NM Timeline dispatcher");
+  }
+
   @Override
   protected void serviceStart() throws Exception {
     super.serviceStart();
@@ -141,6 +145,9 @@ public class NMTimelinePublisher extends CompositeService {
       putEntity(((TimelinePublishEvent) event).getTimelineEntityToPublish(),
           ((TimelinePublishEvent) event).getApplicationId());
       break;
+    case STOP_TIMELINE_CLIENT:
+      removeAndStopTimelineClient(event.getApplicationId());
+      break;
     default:
       LOG.error("Unknown NMTimelineEvent type: " + event.getType());
     }
@@ -392,20 +399,13 @@ public class NMTimelinePublisher extends CompositeService {
   }
 
   private static class TimelinePublishEvent extends NMTimelineEvent {
-    private ApplicationId appId;
     private TimelineEntity entityToPublish;
 
     public TimelinePublishEvent(TimelineEntity entity, ApplicationId appId) {
-      super(NMTimelineEventType.TIMELINE_ENTITY_PUBLISH, System
-          .currentTimeMillis());
-      this.appId = appId;
+      super(NMTimelineEventType.TIMELINE_ENTITY_PUBLISH, appId);
       this.entityToPublish = entity;
     }
 
-    public ApplicationId getApplicationId() {
-      return appId;
-    }
-
     public TimelineEntity getTimelineEntityToPublish() {
       return entityToPublish;
     }
@@ -434,6 +434,11 @@ public class NMTimelinePublisher extends CompositeService {
   }
 
   public void stopTimelineClient(ApplicationId appId) {
+    dispatcher.getEventHandler().handle(
+        new NMTimelineEvent(NMTimelineEventType.STOP_TIMELINE_CLIENT, appId));
+  }
+
+  private void removeAndStopTimelineClient(ApplicationId appId) {
     TimelineV2Client client = appToClientMap.remove(appId);
     if (client != null) {
       client.stop();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d00a0c7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/TestNMTimelinePublisher.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/TestNMTimelinePublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/TestNMTimelinePublisher.java
index 43196c7..2585262 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/TestNMTimelinePublisher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/TestNMTimelinePublisher.java
@@ -31,34 +31,47 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.timelineservice.ContainerEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
 import org.apache.hadoop.yarn.client.api.impl.TimelineV2ClientImpl;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.metrics.ContainerMetricsConstants;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
 import org.junit.Assert;
 import org.junit.Test;
+import org.junit.After;
+import org.junit.Before;
 
 public class TestNMTimelinePublisher {
   private static final String MEMORY_ID = "MEMORY";
   private static final String CPU_ID = "CPU";
 
-  @Test
-  public void testContainerResourceUsage() {
-    Context context = mock(Context.class);
-    @SuppressWarnings("unchecked")
-    final DummyTimelineClient timelineClient = new DummyTimelineClient(null);
-    when(context.getNodeId()).thenReturn(NodeId.newInstance("localhost", 0));
+  private NMTimelinePublisher publisher;
+  private DummyTimelineClient timelineClient;
+  private Configuration conf;
+  private DrainDispatcher dispatcher;
 
-    Configuration conf = new Configuration();
+
+  @Before public void setup() throws Exception {
+    conf = new Configuration();
     conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
     conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
+    conf.setLong(YarnConfiguration.ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS,
+        3000L);
+    timelineClient = new DummyTimelineClient(null);
+    Context context = createMockContext();
+    dispatcher = new DrainDispatcher();
 
-    NMTimelinePublisher publisher = new NMTimelinePublisher(context) {
+    publisher = new NMTimelinePublisher(context) {
       public void createTimelineClient(ApplicationId appId) {
         if (!getAppToClientMap().containsKey(appId)) {
           timelineClient.init(getConfig());
@@ -66,15 +79,73 @@ public class TestNMTimelinePublisher {
           getAppToClientMap().put(appId, timelineClient);
         }
       }
+
+      @Override protected AsyncDispatcher createDispatcher() {
+        return dispatcher;
+      }
     };
     publisher.init(conf);
     publisher.start();
+  }
+
+  private Context createMockContext() {
+    Context context = mock(Context.class);
+    when(context.getNodeId()).thenReturn(NodeId.newInstance("localhost", 0));
+    return context;
+  }
+
+  @After public void tearDown() throws Exception {
+    if (publisher != null) {
+      publisher.stop();
+    }
+    if (timelineClient != null) {
+      timelineClient.stop();
+    }
+  }
+
+  @Test public void testPublishContainerFinish() throws Exception {
+    ApplicationId appId = ApplicationId.newInstance(0, 2);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(appId, 1);
+    ContainerId cId = ContainerId.newContainerId(appAttemptId, 1);
+
+    String diag = "test-diagnostics";
+    int exitStatus = 0;
+    ContainerStatus cStatus = mock(ContainerStatus.class);
+    when(cStatus.getContainerId()).thenReturn(cId);
+    when(cStatus.getDiagnostics()).thenReturn(diag);
+    when(cStatus.getExitStatus()).thenReturn(exitStatus);
+    long timeStamp = System.currentTimeMillis();
+
+    ApplicationContainerFinishedEvent finishedEvent =
+        new ApplicationContainerFinishedEvent(cStatus, timeStamp);
+
+    publisher.createTimelineClient(appId);
+    publisher.publishApplicationEvent(finishedEvent);
+    publisher.stopTimelineClient(appId);
+    dispatcher.await();
+
+    ContainerEntity cEntity = new ContainerEntity();
+    cEntity.setId(cId.toString());
+    TimelineEntity[] lastPublishedEntities =
+        timelineClient.getLastPublishedEntities();
+
+    Assert.assertNotNull(lastPublishedEntities);
+    Assert.assertEquals(1, lastPublishedEntities.length);
+    TimelineEntity entity = lastPublishedEntities[0];
+    Assert.assertTrue(cEntity.equals(entity));
+    Assert.assertEquals(diag,
+        entity.getInfo().get(ContainerMetricsConstants.DIAGNOSTICS_INFO));
+    Assert.assertEquals(exitStatus,
+        entity.getInfo().get(ContainerMetricsConstants.EXIT_STATUS_INFO));
+  }
+
+  @Test public void testContainerResourceUsage() {
     ApplicationId appId = ApplicationId.newInstance(0, 1);
     publisher.createTimelineClient(appId);
     Container aContainer = mock(Container.class);
-    when(aContainer.getContainerId()).thenReturn(ContainerId.newContainerId(
-        ApplicationAttemptId.newInstance(appId, 1),
-        0L));
+    when(aContainer.getContainerId()).thenReturn(ContainerId
+        .newContainerId(ApplicationAttemptId.newInstance(appId, 1), 0L));
     publisher.reportContainerResourceUsage(aContainer, 1024L, 8F);
     verifyPublishedResourceUsageMetrics(timelineClient, 1024L, 8);
     timelineClient.reset();
@@ -91,7 +162,6 @@ public class TestNMTimelinePublisher {
         (float) ResourceCalculatorProcessTree.UNAVAILABLE);
     verifyPublishedResourceUsageMetrics(timelineClient, 1024L,
         ResourceCalculatorProcessTree.UNAVAILABLE);
-    publisher.stop();
   }
 
   private void verifyPublishedResourceUsageMetrics(
@@ -151,8 +221,12 @@ public class TestNMTimelinePublisher {
 
     private TimelineEntity[] lastPublishedEntities;
 
-    @Override
-    public void putEntitiesAsync(TimelineEntity... entities)
+    @Override public void putEntitiesAsync(TimelineEntity... entities)
+        throws IOException, YarnException {
+      this.lastPublishedEntities = entities;
+    }
+
+    @Override public void putEntities(TimelineEntity... entities)
         throws IOException, YarnException {
       this.lastPublishedEntities = entities;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org