You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ep...@apache.org on 2018/09/28 15:32:46 UTC
hadoop git commit: YARN-8774. Memory leak when CapacityScheduler
allocates from reserved container with non-default label. Contributed by Tao
Yang.
Repository: hadoop
Updated Branches:
refs/heads/trunk 7093afd87 -> 8598b498b
YARN-8774. Memory leak when CapacityScheduler allocates from reserved container with non-default label. Contributed by Tao Yang.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8598b498
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8598b498
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8598b498
Branch: refs/heads/trunk
Commit: 8598b498bcaf4deffa822f871a26635bdf3d9d5c
Parents: 7093afd
Author: Eric E Payne <er...@oath.com>
Authored: Fri Sep 28 15:32:07 2018 +0000
Committer: Eric E Payne <er...@oath.com>
Committed: Fri Sep 28 15:32:07 2018 +0000
----------------------------------------------------------------------
.../allocator/RegularContainerAllocator.java | 18 +++++-
.../TestNodeLabelContainerAllocation.java | 62 ++++++++++++++++++++
2 files changed, 79 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8598b498/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
index c0a11a0..c692fdf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java
@@ -750,9 +750,25 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// When reserving container
RMContainer updatedContainer = reservedContainer;
if (updatedContainer == null) {
+ AppPlacementAllocator<FiCaSchedulerNode> ps =
+ application.getAppSchedulingInfo()
+ .getAppPlacementAllocator(schedulerKey);
+ if (null == ps) {
+ LOG.warn("Failed to get " + AppPlacementAllocator.class.getName()
+ + " for application=" + application.getApplicationId()
+ + " schedulerRequestKey=" + schedulerKey);
+ ActivitiesLogger.APP
+ .recordAppActivityWithoutAllocation(activitiesManager, node,
+ application, schedulerKey.getPriority(),
+ ActivityDiagnosticConstant.
+ PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST,
+ ActivityState.REJECTED);
+ return ContainerAllocation.PRIORITY_SKIPPED;
+ }
updatedContainer = new RMContainerImpl(container, schedulerKey,
application.getApplicationAttemptId(), node.getNodeID(),
- application.getAppSchedulingInfo().getUser(), rmContext);
+ application.getAppSchedulingInfo().getUser(), rmContext,
+ ps.getPrimaryRequestedNodePartition());
}
allocationResult.updatedContainer = updatedContainer;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/8598b498/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
index 1836919..9cfddd6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java
@@ -547,6 +547,68 @@ public class TestNodeLabelContainerAllocation {
rm1.close();
}
+ @Test (timeout = 120000)
+ public void testRMContainerLeakInLeafQueue() throws Exception {
+ // set node -> label
+ mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x"));
+ mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x"),
+ NodeId.newInstance("h2", 0), toSet("x")));
+
+ // inject node label manager
+ MockRM rm1 =
+ new MockRM(TestUtils.getConfigurationWithDefaultQueueLabels(conf)) {
+ @Override public RMNodeLabelsManager createNodeLabelManager() {
+ return mgr;
+ }
+ };
+
+ rm1.getRMContext().setNodeLabelManager(mgr);
+ rm1.start();
+ MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); // label = x
+ rm1.registerNode("h2:1234", 8 * GB); // label = x
+
+ // launch an app to queue a1 (label = x), and check all container will
+ // be allocated in h1
+ RMApp app1 = rm1.submitApp(1 * GB, "app1", "user", null, "a1");
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ RMApp app2 = rm1.submitApp(1 * GB, "app2", "user", null, "a1");
+ MockRM.launchAndRegisterAM(app2, rm1, nm1);
+
+ // request a container.
+ am1.allocate("*", 7 * GB, 2, new ArrayList<ContainerId>());
+
+ CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
+ RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
+ LeafQueue leafQueue = (LeafQueue) cs.getQueue("a1");
+
+ // Do node heartbeats 1 time
+ // scheduler will reserve a container for app1
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+
+ // Check if a 4G container allocated for app1, and 4G is reserved
+ FiCaSchedulerApp schedulerApp1 =
+ cs.getApplicationAttempt(am1.getApplicationAttemptId());
+ Assert.assertEquals(1, schedulerApp1.getLiveContainers().size());
+ Assert.assertEquals(1, schedulerApp1.getReservedContainers().size());
+
+ // kill app2 then do node heartbeat 1 time
+ // scheduler will allocate a container from the reserved container on nm1
+ rm1.killApp(app2.getApplicationId());
+ rm1.waitForState(app2.getApplicationId(), RMAppState.KILLED);
+ cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
+ Assert.assertEquals(2, schedulerApp1.getLiveContainers().size());
+ Assert.assertEquals(0, schedulerApp1.getReservedContainers().size());
+
+ // After kill app1, LeafQueue#ignorePartitionExclusivityRMContainers should
+ // be clean, otherwise resource leak happened
+ rm1.killApp(app1.getApplicationId());
+ rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
+ Assert.assertEquals(0, leafQueue.getIgnoreExclusivityRMContainers().size());
+
+ rm1.close();
+ }
+
private void checkPendingResource(MockRM rm, int priority,
ApplicationAttemptId attemptId, int memory) {
CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org