You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by eb...@apache.org on 2020/07/24 23:17:28 UTC
[hadoop] branch branch-2.10 updated: YARN-4771. Some containers can
be skipped during log aggregation after NM restart. Contributed by Jason
Lowe and Jim Brennan.
This is an automated email from the ASF dual-hosted git repository.
ebadger pushed a commit to branch branch-2.10
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-2.10 by this push:
new 2fae1c5 YARN-4771. Some containers can be skipped during log aggregation after NM restart. Contributed by Jason Lowe and Jim Brennan.
2fae1c5 is described below
commit 2fae1c5b0534deb3d1249a901d0530c69a57e333
Author: Eric Badger <eb...@verizonmedia.com>
AuthorDate: Fri Jul 24 23:08:31 2020 +0000
YARN-4771. Some containers can be skipped during log aggregation after NM
restart. Contributed by Jason Lowe and Jim Brennan.
(cherry picked from commit ac5f21dbef0f0ad4210e4027f53877760fa606a5)
---
.../server/nodemanager/NodeStatusUpdaterImpl.java | 11 ++++++----
.../server/nodemanager/TestNodeStatusUpdater.java | 24 ++++++++++------------
2 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index af59e34..0b40dd5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -737,8 +737,13 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
while (i.hasNext()) {
Entry<ContainerId, Long> mapEntry = i.next();
ContainerId cid = mapEntry.getKey();
- if (mapEntry.getValue() < currentTime) {
- if (!context.getContainers().containsKey(cid)) {
+ if (mapEntry.getValue() >= currentTime) {
+ break;
+ }
+ if (!context.getContainers().containsKey(cid)) {
+ ApplicationId appId =
+ cid.getApplicationAttemptId().getApplicationId();
+ if (isApplicationStopped(appId)) {
i.remove();
try {
context.getNMStateStore().removeContainer(cid);
@@ -746,8 +751,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
LOG.error("Unable to remove container " + cid + " in store", e);
}
}
- } else {
- break;
}
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index 533cf2a..0d73a72 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -901,9 +901,8 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
public void testRecentlyFinishedContainers() throws Exception {
NodeManager nm = new NodeManager();
YarnConfiguration conf = new YarnConfiguration();
- conf.set(
- NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS,
- "10000");
+ conf.setInt(NodeStatusUpdaterImpl.
+ YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, 1);
nm.init(conf);
NodeStatusUpdaterImpl nodeStatusUpdater =
(NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
@@ -918,18 +917,17 @@ public class TestNodeStatusUpdater extends NodeManagerTestBase {
nodeStatusUpdater.addCompletedContainer(cId);
Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId));
+ // verify container remains even after expiration if app
+ // is still active
nm.getNMContext().getContainers().remove(cId);
- long time1 = System.currentTimeMillis();
- int waitInterval = 15;
- while (waitInterval-- > 0
- && nodeStatusUpdater.isContainerRecentlyStopped(cId)) {
- nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
- Thread.sleep(1000);
- }
- long time2 = System.currentTimeMillis();
- // By this time the container will be removed from cache. need to verify.
+ Thread.sleep(10);
+ nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
+ Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId));
+
+ // complete the application and verify container is removed
+ nm.getNMContext().getApplications().remove(appId);
+ nodeStatusUpdater.removeVeryOldStoppedContainersFromCache();
Assert.assertFalse(nodeStatusUpdater.isContainerRecentlyStopped(cId));
- Assert.assertTrue((time2 - time1) >= 10000 && (time2 - time1) <= 250000);
}
@Test(timeout = 90000)
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org