You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sj...@apache.org on 2016/01/11 19:11:05 UTC
hadoop git commit: YARN-3995. Some of the NM events are not getting
published due race condition when AM container finishes in NM (Naganarasimha
G R via sjlee)
Repository: hadoop
Updated Branches:
refs/heads/feature-YARN-2928 00452d3c7 -> 36d74ec41
YARN-3995. Some of the NM events are not getting published due race condition when AM container finishes in NM (Naganarasimha G R via sjlee)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/36d74ec4
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/36d74ec4
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/36d74ec4
Branch: refs/heads/feature-YARN-2928
Commit: 36d74ec41f1aac97ff3138e8e893cd6ac5bab608
Parents: 00452d3
Author: Sangjin Lee <sj...@apache.org>
Authored: Mon Jan 11 10:09:34 2016 -0800
Committer: Sangjin Lee <sj...@apache.org>
Committed: Mon Jan 11 10:09:34 2016 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +++
.../hadoop/yarn/conf/YarnConfiguration.java | 5 ++++
.../src/main/resources/yarn-default.xml | 7 ++++++
.../PerNodeTimelineCollectorsAuxService.java | 25 +++++++++++++-------
...TestPerNodeTimelineCollectorsAuxService.java | 11 +++++----
5 files changed, 38 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 5ff425c..cf4522a 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -208,6 +208,9 @@ Branch YARN-2928: Timeline Server Next Generation: Phase 1
YARN-4350. TestDistributedShell fails for V2 scenarios. (Naganarasimha G R
via varunsaxena)
+ YARN-3995. Some of the NM events are not getting published due race
+ condition when AM container finishes in NM (Naganarasimha G R via sjlee)
+
Trunk - Unreleased
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 6a3854a..da9acb1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1610,6 +1610,11 @@ public class YarnConfiguration extends Configuration {
public static final int
DEFAULT_TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS = 60;
+ public static final String ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS =
+ TIMELINE_SERVICE_PREFIX + "app-collector.linger-period.ms";
+
+ public static final int DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS = 1000;
+
// mark app-history related configs @Private as application history is going
// to be integrated into the timeline service
@Private
http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index a9adbbf..13b952e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1984,6 +1984,13 @@
<value>60</value>
</property>
+ <property>
+ <description>Time period till which the application collector will be alive
+ in NM, after the application master container finishes.</description>
+ <name>yarn.timeline-service.app-collector.linger-period.ms</name>
+ <value>1000</value>
+ </property>
+
<!-- Shared Cache Configuration -->
<property>
http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
index 0319e34..b738530 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
@@ -19,6 +19,9 @@
package org.apache.hadoop.yarn.server.timelineservice.collector;
import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -54,6 +57,8 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
private static final int SHUTDOWN_HOOK_PRIORITY = 30;
private final NodeTimelineCollectorManager collectorManager;
+ private long collectorLingerPeriod;
+ private ScheduledExecutorService scheduler;
public PerNodeTimelineCollectorsAuxService() {
this(new NodeTimelineCollectorManager());
@@ -70,6 +75,10 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
if (!YarnConfiguration.timelineServiceV2Enabled(conf)) {
throw new YarnException("Timeline service v2 is not enabled");
}
+ collectorLingerPeriod =
+ conf.getLong(YarnConfiguration.ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS,
+ YarnConfiguration.DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS);
+ scheduler = Executors.newSingleThreadScheduledExecutor();
collectorManager.init(conf);
super.serviceInit(conf);
}
@@ -82,6 +91,12 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
@Override
protected void serviceStop() throws Exception {
+ scheduler.shutdown();
+ if (!scheduler.awaitTermination(collectorLingerPeriod,
+ TimeUnit.MILLISECONDS)) {
+ LOG.warn(
+ "Scheduler terminated before removing the application collectors");
+ }
collectorManager.stop();
super.serviceStop();
}
@@ -141,17 +156,11 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
if (context.getContainerType() == ContainerType.APPLICATION_MASTER) {
final ApplicationId appId =
context.getContainerId().getApplicationAttemptId().getApplicationId();
- new Thread(new Runnable() {
+ scheduler.schedule(new Runnable() {
public void run() {
- try {
- // TODO Temporary Fix until solution for YARN-3995 is finalized.
- Thread.sleep(1000l);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
removeApplication(appId);
}
- }).start();
+ }, collectorLingerPeriod, TimeUnit.MILLISECONDS);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/36d74ec4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
index 4fdf47e..f2775d5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
@@ -22,12 +22,14 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
-import static org.mockito.Mockito.any;
+import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
+import java.io.IOException;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.Shell;
@@ -45,8 +47,6 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.GetTimelineCollectorCon
import org.junit.After;
import org.junit.Test;
-import java.io.IOException;
-
public class TestPerNodeTimelineCollectorsAuxService {
private ApplicationAttemptId appAttemptId;
private PerNodeTimelineCollectorsAuxService auxService;
@@ -103,8 +103,9 @@ public class TestPerNodeTimelineCollectorsAuxService {
when(context.getContainerType()).thenReturn(
ContainerType.APPLICATION_MASTER);
auxService.stopContainer(context);
-
- // TODO Temporary Fix until solution for YARN-3995 is finalized
+ // auxService should have the app's collector and need to remove only after
+ // a configured period
+ assertTrue(auxService.hasApplication(appAttemptId.getApplicationId()));
for (int i = 0; i < 4; i++) {
Thread.sleep(500l);
if (!auxService.hasApplication(appAttemptId.getApplicationId())) {