You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ep...@apache.org on 2016/12/09 20:39:42 UTC
[4/4] hadoop git commit: YARN-4390. Do surgical preemption based on
reserved container in CapacityScheduler. Contributed by Wangda Tan
YARN-4390. Do surgical preemption based on reserved container in CapacityScheduler. Contributed by Wangda Tan
(cherry picked from commit bb62e0592566b2fcae7136b30972aad2d3ac55b0)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/40367c8d
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/40367c8d
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/40367c8d
Branch: refs/heads/branch-2.8
Commit: 40367c8da3a4798b004714c3fbd69d84c1dc84f5
Parents: bced9e2
Author: Jian He <ji...@apache.org>
Authored: Thu May 5 12:56:21 2016 -0700
Committer: Eric Payne <ep...@apache.org>
Committed: Fri Dec 9 20:33:54 2016 +0000
----------------------------------------------------------------------
.../monitor/SchedulingMonitor.java | 16 +-
.../CapacitySchedulerPreemptionUtils.java | 35 +-
.../capacity/FifoCandidatesSelector.java | 26 +-
.../capacity/PreemptableResourceCalculator.java | 139 ++--
.../ProportionalCapacityPreemptionPolicy.java | 96 ++-
.../ReservedContainerCandidatesSelector.java | 316 +++++++++
.../monitor/capacity/TempQueuePerPartition.java | 184 ++++-
.../rmcontainer/RMContainer.java | 2 +
.../rmcontainer/RMContainerImpl.java | 10 +
.../scheduler/AbstractYarnScheduler.java | 2 +-
.../scheduler/SchedulerApplicationAttempt.java | 4 +
.../scheduler/SchedulerNode.java | 21 +-
.../scheduler/capacity/CapacityScheduler.java | 9 +-
.../CapacitySchedulerConfiguration.java | 24 +-
.../scheduler/common/fica/FiCaSchedulerApp.java | 1 +
.../common/fica/FiCaSchedulerNode.java | 7 +-
.../scheduler/fair/FSAppAttempt.java | 1 +
.../scheduler/fair/FairScheduler.java | 3 +-
.../scheduler/fifo/FifoScheduler.java | 2 +-
...alCapacityPreemptionPolicyMockFramework.java | 687 +++++++++++++++++++
...estProportionalCapacityPreemptionPolicy.java | 61 +-
...pacityPreemptionPolicyForNodePartitions.java | 687 +------------------
...tyPreemptionPolicyForReservedContainers.java | 430 ++++++++++++
...alCapacityPreemptionPolicyMockFramework.java | 247 +++++++
.../TestSchedulerApplicationAttempt.java | 3 +-
.../CapacitySchedulerPreemptionTestBase.java | 149 ++++
.../capacity/TestCapacityScheduler.java | 2 +-
.../TestCapacitySchedulerLazyPreemption.java | 639 +++++++++++++++++
.../TestCapacitySchedulerPreemption.java | 683 ------------------
...TestCapacitySchedulerSurgicalPreemption.java | 246 +++++++
.../scheduler/fair/TestFairScheduler.java | 2 +-
.../scheduler/fifo/TestFifoScheduler.java | 2 +-
32 files changed, 3193 insertions(+), 1543 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java
index 55ec858..03e180d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java
@@ -21,6 +21,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
@@ -84,10 +85,17 @@ public class SchedulingMonitor extends AbstractService {
@Override
public void run() {
while (!stopped && !Thread.currentThread().isInterrupted()) {
- //invoke the preemption policy at a regular pace
- //the policy will generate preemption or kill events
- //managed by the dispatcher
- invokePolicy();
+ try {
+ //invoke the preemption policy at a regular pace
+ //the policy will generate preemption or kill events
+ //managed by the dispatcher
+ invokePolicy();
+ } catch (YarnRuntimeException e) {
+ LOG.error("YarnRuntimeException raised while executing preemption"
+ + " checker, skip this run..., exception=", e);
+ }
+
+ // Wait before next run
try {
Thread.sleep(monitorInterval);
} catch (InterruptedException e) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
index a71f108..42d8730 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java
@@ -21,9 +21,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.util.resource.Resources;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@@ -40,9 +42,9 @@ public class CapacitySchedulerPreemptionUtils {
// Only add resToObtainByPartition when actuallyToBePreempted resource >= 0
if (Resources.greaterThan(context.getResourceCalculator(),
- clusterResource, qT.actuallyToBePreempted, Resources.none())) {
+ clusterResource, qT.getActuallyToBePreempted(), Resources.none())) {
resToObtainByPartition.put(qT.partition,
- Resources.clone(qT.actuallyToBePreempted));
+ Resources.clone(qT.getActuallyToBePreempted()));
}
}
@@ -62,4 +64,33 @@ public class CapacitySchedulerPreemptionUtils {
}
return containers.contains(container);
}
+
+ public static void deductPreemptableResourcesBasedSelectedCandidates(
+ CapacitySchedulerPreemptionContext context,
+ Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates) {
+ for (Set<RMContainer> containers : selectedCandidates.values()) {
+ for (RMContainer c : containers) {
+ SchedulerNode schedulerNode = context.getScheduler().getSchedulerNode(
+ c.getAllocatedNode());
+ if (null == schedulerNode) {
+ continue;
+ }
+
+ String partition = schedulerNode.getPartition();
+ String queue = c.getQueueName();
+ TempQueuePerPartition tq = context.getQueueByPartition(queue,
+ partition);
+
+ Resource res = c.getReservedResource();
+ if (null == res) {
+ res = c.getAllocatedResource();
+ }
+
+ if (null != res) {
+ tq.deductActuallyToBePreempted(context.getResourceCalculator(),
+ tq.totalPartitionResource, res);
+ }
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoCandidatesSelector.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoCandidatesSelector.java
index 499d0ff..a8c62fd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoCandidatesSelector.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoCandidatesSelector.java
@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.util.resource.Resources;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -55,7 +54,7 @@ public class FifoCandidatesSelector
super(preemptionContext);
preemptableAmountCalculator = new PreemptableResourceCalculator(
- preemptionContext);
+ preemptionContext, false);
}
@Override
@@ -66,8 +65,13 @@ public class FifoCandidatesSelector
preemptableAmountCalculator.computeIdealAllocation(clusterResource,
totalPreemptionAllowed);
- Map<ApplicationAttemptId, Set<RMContainer>> preemptMap =
- new HashMap<>();
+ // Previous selectors (with higher priority) could have already
+ // selected containers. We need to deduct preemptable resources
+ // based on already selected candidates.
+ CapacitySchedulerPreemptionUtils
+ .deductPreemptableResourcesBasedSelectedCandidates(preemptionContext,
+ selectedCandidates);
+
List<RMContainer> skippedAMContainerlist = new ArrayList<>();
// Loop all leaf queues
@@ -109,7 +113,7 @@ public class FifoCandidatesSelector
continue;
}
boolean preempted = tryPreemptContainerAndDeductResToObtain(
- resToObtainByPartition, c, clusterResource, preemptMap,
+ resToObtainByPartition, c, clusterResource, selectedCandidates,
totalPreemptionAllowed);
if (!preempted) {
continue;
@@ -132,7 +136,7 @@ public class FifoCandidatesSelector
}
preemptFrom(fc, clusterResource, resToObtainByPartition,
- skippedAMContainerlist, skippedAMSize, preemptMap,
+ skippedAMContainerlist, skippedAMSize, selectedCandidates,
totalPreemptionAllowed);
}
@@ -144,13 +148,13 @@ public class FifoCandidatesSelector
leafQueue.getAbsoluteCapacity()),
leafQueue.getMaxAMResourcePerQueuePercent());
- preemptAMContainers(clusterResource, preemptMap, skippedAMContainerlist,
+ preemptAMContainers(clusterResource, selectedCandidates, skippedAMContainerlist,
resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue,
totalPreemptionAllowed);
}
}
- return preemptMap;
+ return selectedCandidates;
}
/**
@@ -236,9 +240,9 @@ public class FifoCandidatesSelector
resourceToObtainByPartitions.remove(nodePartition);
}
if (LOG.isDebugEnabled()) {
- LOG.debug("Marked container=" + rmContainer.getContainerId()
- + " in partition=" + nodePartition
- + " to be preemption candidates");
+ LOG.debug(this.getClass().getName() + " Marked container=" + rmContainer
+ .getContainerId() + " from partition=" + nodePartition + " queue="
+ + rmContainer.getQueueName() + " to be preemption candidates");
}
// Add to preemptMap
addToPreemptMap(preemptMap, attemptId, rmContainer);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/PreemptableResourceCalculator.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/PreemptableResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/PreemptableResourceCalculator.java
index 2217210..103b419 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/PreemptableResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/PreemptableResourceCalculator.java
@@ -37,7 +37,7 @@ import java.util.Set;
/**
* Calculate how much resources need to be preempted for each queue,
- * will be used by {@link FifoCandidatesSelector}
+ * will be used by {@link PreemptionCandidatesSelector}
*/
public class PreemptableResourceCalculator {
private static final Log LOG =
@@ -45,6 +45,7 @@ public class PreemptableResourceCalculator {
private final CapacitySchedulerPreemptionContext context;
private final ResourceCalculator rc;
+ private boolean isReservedPreemptionCandidatesSelector;
static class TQComparator implements Comparator<TempQueuePerPartition> {
private ResourceCalculator rc;
@@ -71,18 +72,31 @@ public class PreemptableResourceCalculator {
// capacity and therefore considered last for resources.
private double getIdealPctOfGuaranteed(TempQueuePerPartition q) {
double pctOver = Integer.MAX_VALUE;
- if (q != null && Resources.greaterThan(
- rc, clusterRes, q.guaranteed, Resources.none())) {
- pctOver =
- Resources.divide(rc, clusterRes, q.idealAssigned, q.guaranteed);
+ if (q != null && Resources.greaterThan(rc, clusterRes,
+ q.getGuaranteed(),
+ Resources.none())) {
+ pctOver = Resources.divide(rc, clusterRes, q.idealAssigned,
+ q.getGuaranteed());
}
return (pctOver);
}
}
- public PreemptableResourceCalculator(CapacitySchedulerPreemptionContext preemptionContext) {
+ /**
+ * PreemptableResourceCalculator constructor
+ *
+ * @param preemptionContext preemption policy context
+ * @param isReservedPreemptionCandidatesSelector this will be set by
+ * different implementation of candidate selectors, please refer to
+ * TempQueuePerPartition#offer for details.
+ */
+ public PreemptableResourceCalculator(
+ CapacitySchedulerPreemptionContext preemptionContext,
+ boolean isReservedPreemptionCandidatesSelector) {
context = preemptionContext;
rc = preemptionContext.getResourceCalculator();
+ this.isReservedPreemptionCandidatesSelector =
+ isReservedPreemptionCandidatesSelector;
}
/**
@@ -101,11 +115,11 @@ public class PreemptableResourceCalculator {
}
} else {
for (TempQueuePerPartition q : queues) {
- Resources.addTo(activeCap, q.guaranteed);
+ Resources.addTo(activeCap, q.getGuaranteed());
}
for (TempQueuePerPartition q : queues) {
q.normalizedGuarantee = Resources.divide(rc, clusterResource,
- q.guaranteed, activeCap);
+ q.getGuaranteed(), activeCap);
}
}
}
@@ -114,7 +128,8 @@ public class PreemptableResourceCalculator {
// return the list of all queues that have the same idealAssigned
// percentage of guaranteed.
protected Collection<TempQueuePerPartition> getMostUnderservedQueues(
- PriorityQueue<TempQueuePerPartition> orderedByNeed, TQComparator tqComparator) {
+ PriorityQueue<TempQueuePerPartition> orderedByNeed,
+ TQComparator tqComparator) {
ArrayList<TempQueuePerPartition> underserved = new ArrayList<>();
while (!orderedByNeed.isEmpty()) {
TempQueuePerPartition q1 = orderedByNeed.remove();
@@ -155,15 +170,19 @@ public class PreemptableResourceCalculator {
tqComparator);
for (Iterator<TempQueuePerPartition> i = qAlloc.iterator(); i.hasNext();) {
TempQueuePerPartition q = i.next();
- if (Resources.greaterThan(rc, tot_guarant, q.current, q.guaranteed)) {
- q.idealAssigned = Resources.add(q.guaranteed, q.untouchableExtra);
+ Resource used = q.getUsed();
+
+ if (Resources.greaterThan(rc, tot_guarant, used,
+ q.getGuaranteed())) {
+ q.idealAssigned = Resources.add(
+ q.getGuaranteed(), q.untouchableExtra);
} else {
- q.idealAssigned = Resources.clone(q.current);
+ q.idealAssigned = Resources.clone(used);
}
Resources.subtractFrom(unassigned, q.idealAssigned);
- // If idealAssigned < (current + pending), q needs more resources, so
+ // If idealAssigned < (allocated + used + pending), q needs more resources, so
// add it to the list of underserved queues, ordered by need.
- Resource curPlusPend = Resources.add(q.current, q.pending);
+ Resource curPlusPend = Resources.add(q.getUsed(), q.pending);
if (Resources.lessThan(rc, tot_guarant, q.idealAssigned, curPlusPend)) {
orderedByNeed.add(q);
}
@@ -190,7 +209,8 @@ public class PreemptableResourceCalculator {
TempQueuePerPartition sub = i.next();
Resource wQavail = Resources.multiplyAndNormalizeUp(rc,
unassigned, sub.normalizedGuarantee, Resource.newInstance(1, 1));
- Resource wQidle = sub.offer(wQavail, rc, tot_guarant);
+ Resource wQidle = sub.offer(wQavail, rc, tot_guarant,
+ isReservedPreemptionCandidatesSelector);
Resource wQdone = Resources.subtract(wQavail, wQidle);
if (Resources.greaterThan(rc, tot_guarant,
@@ -234,8 +254,8 @@ public class PreemptableResourceCalculator {
Set<TempQueuePerPartition> zeroGuarQueues = new HashSet<>();
for (TempQueuePerPartition q : qAlloc) {
- if (Resources
- .greaterThan(rc, tot_guarant, q.guaranteed, Resources.none())) {
+ if (Resources.greaterThan(rc, tot_guarant,
+ q.getGuaranteed(), Resources.none())) {
nonZeroGuarQueues.add(q);
} else {
zeroGuarQueues.add(q);
@@ -258,19 +278,22 @@ public class PreemptableResourceCalculator {
// how much preemption is required overall
Resource totPreemptionNeeded = Resource.newInstance(0, 0);
for (TempQueuePerPartition t:queues) {
- if (Resources.greaterThan(rc, tot_guarant, t.current, t.idealAssigned)) {
- Resources.addTo(totPreemptionNeeded,
- Resources.subtract(t.current, t.idealAssigned));
+ if (Resources.greaterThan(rc, tot_guarant,
+ t.getUsed(), t.idealAssigned)) {
+ Resources.addTo(totPreemptionNeeded, Resources
+ .subtract(t.getUsed(), t.idealAssigned));
}
}
- // if we need to preempt more than is allowed, compute a factor (0<f<1)
- // that is used to scale down how much we ask back from each queue
+ /**
+ * if we need to preempt more than is allowed, compute a factor (0<f<1)
+ * that is used to scale down how much we ask back from each queue
+ */
float scalingFactor = 1.0F;
- if (Resources.greaterThan(rc, tot_guarant,
- totPreemptionNeeded, totalPreemptionAllowed)) {
- scalingFactor = Resources.divide(rc, tot_guarant,
- totalPreemptionAllowed, totPreemptionNeeded);
+ if (Resources.greaterThan(rc,
+ tot_guarant, totPreemptionNeeded, totalPreemptionAllowed)) {
+ scalingFactor = Resources.divide(rc, tot_guarant, totalPreemptionAllowed,
+ totPreemptionNeeded);
}
// assign to each queue the amount of actual preemption based on local
@@ -278,12 +301,6 @@ public class PreemptableResourceCalculator {
for (TempQueuePerPartition t : queues) {
t.assignPreemption(scalingFactor, rc, tot_guarant);
}
- if (LOG.isDebugEnabled()) {
- for (TempQueuePerPartition t : queues) {
- LOG.debug(t);
- }
- }
-
}
/**
@@ -329,12 +346,31 @@ public class PreemptableResourceCalculator {
for (TempQueuePerPartition qT : context.getQueuePartitions(queueName)) {
// we act only if we are violating balance by more than
// maxIgnoredOverCapacity
- if (Resources.greaterThan(rc, clusterResource, qT.current,
- Resources.multiply(qT.guaranteed, 1.0 + context.getMaxIgnoreOverCapacity()))) {
- // we introduce a dampening factor naturalTerminationFactor that
- // accounts for natural termination of containers
- Resource resToObtain = Resources.multiply(qT.toBePreempted,
- context.getNaturalTerminationFactor());
+ if (Resources.greaterThan(rc, clusterResource,
+ qT.getUsed(), Resources
+ .multiply(qT.getGuaranteed(),
+ 1.0 + context.getMaxIgnoreOverCapacity()))) {
+ /*
+ * We introduce a dampening factor naturalTerminationFactor that
+ * accounts for natural termination of containers.
+ *
+ * This is added to control pace of preemption, let's say:
+ * If preemption policy calculated a queue *should be* preempted 20 GB
+ * And the nature_termination_factor set to 0.1. As a result, preemption
+ * policy will select 20 GB * 0.1 = 2GB containers to be preempted.
+ *
+ * However, it doesn't work for YARN-4390:
+ * For example, if a queue needs to be preempted 20GB for *one single*
+ * large container, preempt 10% of such resource isn't useful.
+ * So to make it simple, only apply nature_termination_factor when
+ * selector is not reservedPreemptionCandidatesSelector.
+ */
+ Resource resToObtain = qT.toBePreempted;
+ if (!isReservedPreemptionCandidatesSelector) {
+ resToObtain = Resources.multiply(qT.toBePreempted,
+ context.getNaturalTerminationFactor());
+ }
+
// Only add resToObtain when it >= 0
if (Resources.greaterThan(rc, clusterResource, resToObtain,
Resources.none())) {
@@ -343,22 +379,39 @@ public class PreemptableResourceCalculator {
+ " resource-to-obtain=" + resToObtain);
}
}
- qT.actuallyToBePreempted = Resources.clone(resToObtain);
+ qT.setActuallyToBePreempted(Resources.clone(resToObtain));
} else {
- qT.actuallyToBePreempted = Resources.none();
+ qT.setActuallyToBePreempted(Resources.none());
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(qT);
}
}
}
}
+ private void updatePreemptableExtras(TempQueuePerPartition cur) {
+ if (cur.children == null || cur.children.isEmpty()) {
+ cur.updatePreemptableExtras(rc);
+ } else {
+ for (TempQueuePerPartition child : cur.children) {
+ updatePreemptableExtras(child);
+ }
+ cur.updatePreemptableExtras(rc);
+ }
+ }
+
public void computeIdealAllocation(Resource clusterResource,
Resource totalPreemptionAllowed) {
for (String partition : context.getAllPartitions()) {
- TempQueuePerPartition tRoot =
- context.getQueueByPartition(CapacitySchedulerConfiguration.ROOT, partition);
+ TempQueuePerPartition tRoot = context.getQueueByPartition(
+ CapacitySchedulerConfiguration.ROOT, partition);
+ updatePreemptableExtras(tRoot);
+
// compute the ideal distribution of resources among queues
// updates cloned queues state accordingly
- tRoot.idealAssigned = tRoot.guaranteed;
+ tRoot.idealAssigned = tRoot.getGuaranteed();
recursivelyComputeIdealAssignment(tRoot, totalPreemptionAllowed);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
index 5de64b1..bf19f5a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java
@@ -34,7 +34,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResour
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptableQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent;
@@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.apache.hadoop.yarn.util.resource.Resources;
import java.io.IOException;
+import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -173,6 +174,15 @@ public class ProportionalCapacityPreemptionPolicy
rc = scheduler.getResourceCalculator();
nlm = scheduler.getRMContext().getNodeLabelManager();
+ // Do we need to specially consider reserved containers?
+ boolean selectCandidatesForResevedContainers = csConfig.getBoolean(
+ CapacitySchedulerConfiguration.PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS,
+ CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS);
+ if (selectCandidatesForResevedContainers) {
+ candidatesSelectionPolicies.add(
+ new ReservedContainerCandidatesSelector(this));
+ }
+
// initialize candidates preemption selection policies
candidatesSelectionPolicies.add(
new FifoCandidatesSelector(this));
@@ -185,9 +195,15 @@ public class ProportionalCapacityPreemptionPolicy
@Override
public void editSchedule() {
+ long startTs = clock.getTime();
+
CSQueue root = scheduler.getRootQueue();
Resource clusterResources = Resources.clone(scheduler.getClusterResource());
containerBasedPreemptOrKill(root, clusterResources);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Total time used=" + (clock.getTime() - startTs) + " ms.");
+ }
}
@SuppressWarnings("unchecked")
@@ -295,8 +311,8 @@ public class ProportionalCapacityPreemptionPolicy
queueToPartitions.clear();
for (String partitionToLookAt : allPartitions) {
- cloneQueues(root,
- nlm.getResourceByLabel(partitionToLookAt, clusterResources),
+ cloneQueues(root, Resources
+ .clone(nlm.getResourceByLabel(partitionToLookAt, clusterResources)),
partitionToLookAt);
}
}
@@ -311,9 +327,15 @@ public class ProportionalCapacityPreemptionPolicy
// based on ideal allocation select containers to be preemptionCandidates from each
// queue and each application
- Map<ApplicationAttemptId, Set<RMContainer>> toPreempt = null;
+ Map<ApplicationAttemptId, Set<RMContainer>> toPreempt =
+ new HashMap<>();
for (PreemptionCandidatesSelector selector :
candidatesSelectionPolicies) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(MessageFormat
+ .format("Trying to use {0} to select preemption candidates",
+ selector.getClass().getName()));
+ }
toPreempt = selector.selectCandidates(toPreempt,
clusterResources, totalPreemptionAllowed);
}
@@ -377,14 +399,15 @@ public class ProportionalCapacityPreemptionPolicy
float absMaxCap = qc.getAbsoluteMaximumCapacity(partitionToLookAt);
boolean preemptionDisabled = curQueue.getPreemptionDisabled();
- Resource current = curQueue.getQueueResourceUsage().getUsed(
- partitionToLookAt);
- Resource guaranteed = Resources.multiply(partitionResource, absCap);
- Resource maxCapacity = Resources.multiply(partitionResource, absMaxCap);
+ Resource current = Resources.clone(
+ curQueue.getQueueResourceUsage().getUsed(partitionToLookAt));
Resource killable = Resources.none();
+
+ Resource reserved = Resources.clone(
+ curQueue.getQueueResourceUsage().getReserved(partitionToLookAt));
if (null != preemptableQueues.get(queueName)) {
- killable = preemptableQueues.get(queueName)
- .getKillableResource(partitionToLookAt);
+ killable = Resources.clone(preemptableQueues.get(queueName)
+ .getKillableResource(partitionToLookAt));
}
// when partition is a non-exclusive partition, the actual maxCapacity
@@ -392,53 +415,24 @@ public class ProportionalCapacityPreemptionPolicy
try {
if (!scheduler.getRMContext().getNodeLabelManager()
.isExclusiveNodeLabel(partitionToLookAt)) {
- maxCapacity =
- Resources.max(rc, partitionResource, maxCapacity, current);
+ absMaxCap = 1.0f;
}
} catch (IOException e) {
// This may cause by partition removed when running capacity monitor,
// just ignore the error, this will be corrected when doing next check.
}
- Resource extra = Resource.newInstance(0, 0);
- if (Resources.greaterThan(rc, partitionResource, current, guaranteed)) {
- extra = Resources.subtract(current, guaranteed);
- }
- if (curQueue instanceof LeafQueue) {
- LeafQueue l = (LeafQueue) curQueue;
- Resource pending =
- l.getTotalPendingResourcesConsideringUserLimit(
- partitionResource, partitionToLookAt);
- ret = new TempQueuePerPartition(queueName, current, pending, guaranteed,
- maxCapacity, preemptionDisabled, partitionToLookAt, killable);
- if (preemptionDisabled) {
- ret.untouchableExtra = extra;
- } else {
- ret.preemptableExtra = extra;
- }
- ret.setLeafQueue(l);
- } else {
- Resource pending = Resource.newInstance(0, 0);
- ret =
- new TempQueuePerPartition(curQueue.getQueueName(), current, pending,
- guaranteed, maxCapacity, false, partitionToLookAt, killable);
- Resource childrensPreemptable = Resource.newInstance(0, 0);
+ ret = new TempQueuePerPartition(queueName, current, preemptionDisabled,
+ partitionToLookAt, killable, absCap, absMaxCap, partitionResource,
+ reserved, curQueue);
+
+ if (curQueue instanceof ParentQueue) {
+ // Recursively add children
for (CSQueue c : curQueue.getChildQueues()) {
- TempQueuePerPartition subq =
- cloneQueues(c, partitionResource, partitionToLookAt);
- Resources.addTo(childrensPreemptable, subq.preemptableExtra);
+ TempQueuePerPartition subq = cloneQueues(c, partitionResource,
+ partitionToLookAt);
ret.addChild(subq);
}
- // untouchableExtra = max(extra - childrenPreemptable, 0)
- if (Resources.greaterThanOrEqual(
- rc, partitionResource, childrensPreemptable, extra)) {
- ret.untouchableExtra = Resource.newInstance(0, 0);
- } else {
- ret.untouchableExtra =
- Resources.subtract(extra, childrensPreemptable);
- }
- ret.preemptableExtra = Resources.min(
- rc, partitionResource, childrensPreemptable, extra);
}
}
addTempQueuePartition(ret);
@@ -481,7 +475,8 @@ public class ProportionalCapacityPreemptionPolicy
String partition) {
Map<String, TempQueuePerPartition> partitionToQueues;
if (null == (partitionToQueues = queueToPartitions.get(queueName))) {
- return null;
+ throw new YarnRuntimeException("This shouldn't happen, cannot find "
+ + "TempQueuePerPartition for queueName=" + queueName);
}
return partitionToQueues.get(partition);
}
@@ -492,7 +487,8 @@ public class ProportionalCapacityPreemptionPolicy
@Override
public Collection<TempQueuePerPartition> getQueuePartitions(String queueName) {
if (!queueToPartitions.containsKey(queueName)) {
- return null;
+ throw new YarnRuntimeException("This shouldn't happen, cannot find "
+ + "TempQueuePerPartition collection for queueName=" + queueName);
}
return queueToPartitions.get(queueName).values();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java
new file mode 100644
index 0000000..3900a35
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java
@@ -0,0 +1,316 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
+import org.apache.hadoop.yarn.util.resource.Resources;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class ReservedContainerCandidatesSelector
+ extends PreemptionCandidatesSelector {
+ private static final Log LOG =
+ LogFactory.getLog(ReservedContainerCandidatesSelector.class);
+
+ private PreemptableResourceCalculator preemptableAmountCalculator;
+
+ /**
+ * A temporary data structure to remember what to preempt on a node
+ */
+ private static class NodeForPreemption {
+ private float preemptionCost;
+ private FiCaSchedulerNode schedulerNode;
+ private List<RMContainer> selectedContainers;
+
+ public NodeForPreemption(float preemptionCost,
+ FiCaSchedulerNode schedulerNode, List<RMContainer> selectedContainers) {
+ this.preemptionCost = preemptionCost;
+ this.schedulerNode = schedulerNode;
+ this.selectedContainers = selectedContainers;
+ }
+ }
+
+ ReservedContainerCandidatesSelector(
+ CapacitySchedulerPreemptionContext preemptionContext) {
+ super(preemptionContext);
+ preemptableAmountCalculator = new PreemptableResourceCalculator(
+ preemptionContext, true);
+ }
+
+ @Override
+ public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(
+ Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
+ Resource clusterResource,
+ Resource totalPreemptedResourceAllowed) {
+ // Calculate how much resources we need to preempt
+ preemptableAmountCalculator.computeIdealAllocation(clusterResource,
+ totalPreemptedResourceAllowed);
+
+ // Get queue to preemptable resource by partition
+ Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition =
+ new HashMap<>();
+ for (String leafQueue : preemptionContext.getLeafQueueNames()) {
+ queueToPreemptableResourceByPartition.put(leafQueue,
+ CapacitySchedulerPreemptionUtils
+ .getResToObtainByPartitionForLeafQueue(preemptionContext,
+ leafQueue, clusterResource));
+ }
+
+ // Get list of nodes for preemption, ordered by preemption cost
+ List<NodeForPreemption> nodesForPreemption = getNodesForPreemption(
+ clusterResource, queueToPreemptableResourceByPartition,
+ selectedCandidates, totalPreemptedResourceAllowed);
+
+ for (NodeForPreemption nfp : nodesForPreemption) {
+ RMContainer reservedContainer = nfp.schedulerNode.getReservedContainer();
+ if (null == reservedContainer) {
+ continue;
+ }
+
+ NodeForPreemption preemptionResult = getPreemptionCandidatesOnNode(
+ nfp.schedulerNode, clusterResource,
+ queueToPreemptableResourceByPartition, selectedCandidates,
+ totalPreemptedResourceAllowed, false);
+ if (null != preemptionResult) {
+ for (RMContainer c : preemptionResult.selectedContainers) {
+ ApplicationAttemptId appId = c.getApplicationAttemptId();
+ Set<RMContainer> containers = selectedCandidates.get(appId);
+ if (null == containers) {
+ containers = new HashSet<>();
+ selectedCandidates.put(appId, containers);
+ }
+
+ containers.add(c);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(this.getClass().getName() + " Marked container=" + c
+ .getContainerId() + " from queue=" + c.getQueueName()
+ + " to be preemption candidates");
+ }
+ }
+ }
+ }
+
+ return selectedCandidates;
+ }
+
+ private Resource getPreemptableResource(String queueName,
+ String partitionName,
+ Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition) {
+ Map<String, Resource> partitionToPreemptable =
+ queueToPreemptableResourceByPartition.get(queueName);
+ if (null == partitionToPreemptable) {
+ return null;
+ }
+
+ Resource preemptable = partitionToPreemptable.get(partitionName);
+ return preemptable;
+ }
+
+ private boolean tryToPreemptFromQueue(Resource cluster, String queueName,
+ String partitionName,
+ Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition,
+ Resource required, Resource totalPreemptionAllowed, boolean readOnly) {
+ Resource preemptable = getPreemptableResource(queueName, partitionName,
+ queueToPreemptableResourceByPartition);
+ if (null == preemptable) {
+ return false;
+ }
+
+ if (!Resources.fitsIn(rc, cluster, required, preemptable)) {
+ return false;
+ }
+
+ if (!Resources.fitsIn(rc, cluster, required, totalPreemptionAllowed)) {
+ return false;
+ }
+
+ if (!readOnly) {
+ Resources.subtractFrom(preemptable, required);
+ Resources.subtractFrom(totalPreemptionAllowed, required);
+ }
+ return true;
+ }
+
+
+
+ /**
+ * Try to check if we can preempt resources for reserved container in given node
+ * @param node
+ * @param cluster
+ * @param queueToPreemptableResourceByPartition it's a map of
+ * <queueName, <partition, preemptable-resource>>
+ * @param readOnly do we want to modify preemptable resource after we selected
+ * candidates
+ * @return NodeForPreemption if it's possible to preempt containers on the node
+ * to satisfy reserved resource
+ */
+ private NodeForPreemption getPreemptionCandidatesOnNode(
+ FiCaSchedulerNode node, Resource cluster,
+ Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition,
+ Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
+ Resource totalPreemptionAllowed, boolean readOnly) {
+ RMContainer reservedContainer = node.getReservedContainer();
+ Resource available = Resources.clone(node.getAvailableResource());
+ Resource totalSelected = Resources.createResource(0);
+ List<RMContainer> sortedRunningContainers =
+ node.getCopiedListOfRunningContainers();
+ List<RMContainer> selectedContainers = new ArrayList<>();
+ Map<ContainerId, RMContainer> killableContainers =
+ node.getKillableContainers();
+
+ // Sort running container by launch time, we preferred to preempt recent
+ // launched preempt container
+ Collections.sort(sortedRunningContainers, new Comparator<RMContainer>() {
+ @Override public int compare(RMContainer o1, RMContainer o2) {
+ return -1 * o1.getContainerId().compareTo(o2.getContainerId());
+ }
+ });
+
+ // First check: can we preempt containers to allocate the
+ // reservedContainer?
+ boolean canAllocateReservedContainer = false;
+
+ // At least, we can get available + killable resources from this node
+ Resource cur = Resources.add(available, node.getTotalKillableResources());
+ String partition = node.getPartition();
+
+ // Avoid preempt any container if required <= available + killable
+ if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(),
+ cur)) {
+ return null;
+ }
+
+ // Extra cost of am container preemption
+ float amPreemptionCost = 0f;
+
+ for (RMContainer c : sortedRunningContainers) {
+ String containerQueueName = c.getQueueName();
+
+ // Skip container if it is already marked killable
+ if (killableContainers.containsKey(c.getContainerId())) {
+ continue;
+ }
+
+ // An alternative approach is add a "penalty cost" if AM container is
+ // selected. Here for safety, avoid preempt AM container in any cases
+ if (c.isAMContainer()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Skip selecting AM container on host=" + node.getNodeID()
+ + " AM container=" + c.getContainerId());
+ }
+ continue;
+ }
+
+ // Can we preempt container c?
+ // Check if we have quota to preempt this container
+ boolean canPreempt = tryToPreemptFromQueue(cluster, containerQueueName,
+ partition, queueToPreemptableResourceByPartition,
+ c.getAllocatedResource(), totalPreemptionAllowed, readOnly);
+
+ // If we can, add to selected container, and change resource accordingly.
+ if (canPreempt) {
+ if (!CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c,
+ selectedCandidates)) {
+ if (!readOnly) {
+ selectedContainers.add(c);
+ }
+ Resources.addTo(totalSelected, c.getAllocatedResource());
+ }
+ Resources.addTo(cur, c.getAllocatedResource());
+ if (Resources.fitsIn(rc, cluster,
+ reservedContainer.getReservedResource(), cur)) {
+ canAllocateReservedContainer = true;
+ break;
+ }
+ }
+ }
+
+ if (!canAllocateReservedContainer) {
+ if (!readOnly) {
+ // Revert queue preemption quotas
+ for (RMContainer c : selectedContainers) {
+ Resource res = getPreemptableResource(c.getQueueName(), partition,
+ queueToPreemptableResourceByPartition);
+ if (null == res) {
+ // This shouldn't happen in normal cases, one possible cause is
+ // container moved to different queue while executing preemption logic.
+ // Ignore such failures.
+ continue;
+ }
+ Resources.addTo(res, c.getAllocatedResource());
+ }
+ }
+ return null;
+ }
+
+ float ratio = Resources.ratio(rc, totalSelected,
+ reservedContainer.getReservedResource());
+
+ // Compute preemption score
+ NodeForPreemption nfp = new NodeForPreemption(ratio + amPreemptionCost,
+ node, selectedContainers);
+
+ return nfp;
+ }
+
+ private List<NodeForPreemption> getNodesForPreemption(Resource cluster,
+ Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition,
+ Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
+ Resource totalPreemptionAllowed) {
+ List<NodeForPreemption> nfps = new ArrayList<>();
+
+ // get nodes have reserved container
+ for (FiCaSchedulerNode node : preemptionContext.getScheduler()
+ .getAllNodes().values()) {
+ if (node.getReservedContainer() != null) {
+ NodeForPreemption nfp = getPreemptionCandidatesOnNode(node, cluster,
+ queueToPreemptableResourceByPartition, selectedCandidates,
+ totalPreemptionAllowed, true);
+ if (null != nfp) {
+ // Null means we cannot preempt containers on the node to satisfy
+ // reserved container
+ nfps.add(nfp);
+ }
+ }
+ }
+
+ // Return sorted node-for-preemptions (by cost)
+ Collections.sort(nfps, new Comparator<NodeForPreemption>() {
+ @Override
+ public int compare(NodeForPreemption o1, NodeForPreemption o2) {
+ return Float.compare(o1.preemptionCost, o2.preemptionCost);
+ }
+ });
+
+ return nfps;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
index 8b01a73..04ed135 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.apache.hadoop.yarn.util.resource.Resources;
@@ -32,20 +33,22 @@ import java.util.ArrayList;
public class TempQueuePerPartition {
// Following fields are copied from scheduler
final String queueName;
- final Resource current;
- final Resource pending;
- final Resource guaranteed;
- final Resource maxCapacity;
- final Resource killable;
final String partition;
+ final Resource pending;
+
+ private final Resource current;
+ private final Resource killable;
+ private final Resource reserved;
+ private final float absCapacity;
+ private final float absMaxCapacity;
+ final Resource totalPartitionResource;
// Following fields are setted and used by candidate selection policies
Resource idealAssigned;
Resource toBePreempted;
Resource untouchableExtra;
Resource preemptableExtra;
- // For logging purpose
- Resource actuallyToBePreempted;
+ private Resource actuallyToBePreempted;
double normalizedGuarantee;
@@ -53,14 +56,22 @@ public class TempQueuePerPartition {
LeafQueue leafQueue;
boolean preemptionDisabled;
- TempQueuePerPartition(String queueName, Resource current, Resource pending,
- Resource guaranteed, Resource maxCapacity, boolean preemptionDisabled,
- String partition, Resource killable) {
+ TempQueuePerPartition(String queueName, Resource current,
+ boolean preemptionDisabled, String partition, Resource killable,
+ float absCapacity, float absMaxCapacity, Resource totalPartitionResource,
+ Resource reserved, CSQueue queue) {
this.queueName = queueName;
this.current = current;
- this.pending = pending;
- this.guaranteed = guaranteed;
- this.maxCapacity = maxCapacity;
+
+ if (queue instanceof LeafQueue) {
+ LeafQueue l = (LeafQueue) queue;
+ pending = l.getTotalPendingResourcesConsideringUserLimit(
+ totalPartitionResource, partition);
+ leafQueue = l;
+ } else {
+ pending = Resources.createResource(0);
+ }
+
this.idealAssigned = Resource.newInstance(0, 0);
this.actuallyToBePreempted = Resource.newInstance(0, 0);
this.toBePreempted = Resource.newInstance(0, 0);
@@ -71,6 +82,10 @@ public class TempQueuePerPartition {
this.preemptionDisabled = preemptionDisabled;
this.partition = partition;
this.killable = killable;
+ this.absCapacity = absCapacity;
+ this.absMaxCapacity = absMaxCapacity;
+ this.totalPartitionResource = totalPartitionResource;
+ this.reserved = reserved;
}
public void setLeafQueue(LeafQueue l) {
@@ -92,31 +107,101 @@ public class TempQueuePerPartition {
return children;
}
+ public Resource getUsed() {
+ return current;
+ }
+
+ public Resource getUsedDeductReservd() {
+ return Resources.subtract(current, reserved);
+ }
+
// This function "accepts" all the resources it can (pending) and return
// the unused ones
Resource offer(Resource avail, ResourceCalculator rc,
- Resource clusterResource) {
+ Resource clusterResource, boolean considersReservedResource) {
Resource absMaxCapIdealAssignedDelta = Resources.componentwiseMax(
- Resources.subtract(maxCapacity, idealAssigned),
+ Resources.subtract(getMax(), idealAssigned),
Resource.newInstance(0, 0));
// remain = avail - min(avail, (max - assigned), (current + pending - assigned))
- Resource accepted =
- Resources.min(rc, clusterResource,
- absMaxCapIdealAssignedDelta,
- Resources.min(rc, clusterResource, avail, Resources.subtract(
- Resources.add(current, pending), idealAssigned)));
+ Resource accepted = Resources.min(rc, clusterResource,
+ absMaxCapIdealAssignedDelta, Resources.min(rc, clusterResource, avail,
+ Resources
+ /*
+ * When we're using FifoPreemptionSelector
+ * (considerReservedResource = false).
+ *
+ * We should deduct reserved resource to avoid excessive preemption:
+ *
+ * For example, if an under-utilized queue has used = reserved = 20.
+ * Preemption policy will try to preempt 20 containers
+ * (which is not satisfied) from different hosts.
+ *
+ * In FifoPreemptionSelector, there's no guarantee that preempted
+ * resource can be used by pending request, so policy will preempt
+ * resources repeatly.
+ */
+ .subtract(Resources.add(
+ (considersReservedResource ? getUsed() :
+ getUsedDeductReservd()),
+ pending), idealAssigned)));
Resource remain = Resources.subtract(avail, accepted);
Resources.addTo(idealAssigned, accepted);
return remain;
}
+ public Resource getGuaranteed() {
+ return Resources.multiply(totalPartitionResource, absCapacity);
+ }
+
+ public Resource getMax() {
+ return Resources.multiply(totalPartitionResource, absMaxCapacity);
+ }
+
+ public void updatePreemptableExtras(ResourceCalculator rc) {
+ // Reset untouchableExtra and preemptableExtra
+ untouchableExtra = Resources.none();
+ preemptableExtra = Resources.none();
+
+ Resource extra = Resources.subtract(getUsed(),
+ getGuaranteed());
+ if (Resources.lessThan(rc, totalPartitionResource, extra,
+ Resources.none())) {
+ extra = Resources.none();
+ }
+
+ if (null == children || children.isEmpty()) {
+ // If it is a leaf queue
+ if (preemptionDisabled) {
+ untouchableExtra = extra;
+ } else {
+ preemptableExtra = extra;
+ }
+ } else {
+ // If it is a parent queue
+ Resource childrensPreemptable = Resource.newInstance(0, 0);
+ for (TempQueuePerPartition child : children) {
+ Resources.addTo(childrensPreemptable, child.preemptableExtra);
+ }
+ // untouchableExtra = max(extra - childrenPreemptable, 0)
+ if (Resources.greaterThanOrEqual(rc, totalPartitionResource,
+ childrensPreemptable, extra)) {
+ untouchableExtra = Resource.newInstance(0, 0);
+ } else {
+ untouchableExtra = Resources.subtract(extra, childrensPreemptable);
+ }
+ preemptableExtra = Resources.min(rc, totalPartitionResource,
+ childrensPreemptable, extra);
+ }
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(" NAME: " + queueName)
.append(" CUR: ").append(current)
.append(" PEN: ").append(pending)
- .append(" GAR: ").append(guaranteed)
+ .append(" RESERVED: ").append(reserved)
+ .append(" GAR: ").append(getGuaranteed())
.append(" NORM: ").append(normalizedGuarantee)
.append(" IDEAL_ASSIGNED: ").append(idealAssigned)
.append(" IDEAL_PREEMPT: ").append(toBePreempted)
@@ -130,29 +215,60 @@ public class TempQueuePerPartition {
public void assignPreemption(float scalingFactor, ResourceCalculator rc,
Resource clusterResource) {
- if (Resources.greaterThan(rc, clusterResource,
- Resources.subtract(current, killable), idealAssigned)) {
- toBePreempted = Resources.multiply(Resources
- .subtract(Resources.subtract(current, killable), idealAssigned),
- scalingFactor);
+ Resource usedDeductKillable = Resources.subtract(
+ getUsed(), killable);
+ Resource totalResource = Resources.add(getUsed(), pending);
+
+ // The minimum resource that we need to keep for a queue is:
+ // max(idealAssigned, min(used + pending, guaranteed)).
+ //
+ // Doing this because when we calculate ideal allocation doesn't consider
+ // reserved resource, ideal-allocation calculated could be less than
+ // guaranteed and total. We should avoid preempt from a queue if it is already
+ // <= its guaranteed resource.
+ Resource minimumQueueResource = Resources.max(rc, clusterResource,
+ Resources.min(rc, clusterResource, totalResource, getGuaranteed()),
+ idealAssigned);
+
+ if (Resources.greaterThan(rc, clusterResource, usedDeductKillable,
+ minimumQueueResource)) {
+ toBePreempted = Resources.multiply(
+ Resources.subtract(usedDeductKillable, minimumQueueResource), scalingFactor);
} else {
- toBePreempted = Resource.newInstance(0, 0);
+ toBePreempted = Resources.none();
+ }
+ }
+
+ public Resource getActuallyToBePreempted() {
+ return actuallyToBePreempted;
+ }
+
+ public void setActuallyToBePreempted(Resource res) {
+ this.actuallyToBePreempted = res;
+ }
+
+ public void deductActuallyToBePreempted(ResourceCalculator rc,
+ Resource cluster, Resource toBeDeduct) {
+ if (Resources.greaterThan(rc, cluster, actuallyToBePreempted, toBeDeduct)) {
+ Resources.subtractFrom(actuallyToBePreempted, toBeDeduct);
}
+ actuallyToBePreempted = Resources.max(rc, cluster, actuallyToBePreempted,
+ Resources.none());
}
void appendLogString(StringBuilder sb) {
sb.append(queueName).append(", ")
- .append(current.getMemory()).append(", ")
+ .append(current.getMemorySize()).append(", ")
.append(current.getVirtualCores()).append(", ")
- .append(pending.getMemory()).append(", ")
+ .append(pending.getMemorySize()).append(", ")
.append(pending.getVirtualCores()).append(", ")
- .append(guaranteed.getMemory()).append(", ")
- .append(guaranteed.getVirtualCores()).append(", ")
- .append(idealAssigned.getMemory()).append(", ")
+ .append(getGuaranteed().getMemorySize()).append(", ")
+ .append(getGuaranteed().getVirtualCores()).append(", ")
+ .append(idealAssigned.getMemorySize()).append(", ")
.append(idealAssigned.getVirtualCores()).append(", ")
- .append(toBePreempted.getMemory()).append(", ")
+ .append(toBePreempted.getMemorySize()).append(", ")
.append(toBePreempted.getVirtualCores() ).append(", ")
- .append(actuallyToBePreempted.getMemory()).append(", ")
+ .append(actuallyToBePreempted.getMemorySize()).append(", ")
.append(actuallyToBePreempted.getVirtualCores());
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
index dfe0886..f37923f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
@@ -89,4 +89,6 @@ public interface RMContainer extends EventHandler<RMContainerEvent> {
boolean hasIncreaseReservation();
void cancelIncreaseReservation();
+
+ String getQueueName();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
index 2d0126b..44589aa 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
@@ -181,6 +181,7 @@ public class RMContainerImpl implements RMContainer, Comparable<RMContainer> {
// Only used for container resource increase and decrease. This is the
// resource to rollback to should container resource increase token expires.
private Resource lastConfirmedResource;
+ private volatile String queueName;
public RMContainerImpl(Container container,
ApplicationAttemptId appAttemptId, NodeId nodeId, String user,
@@ -817,4 +818,13 @@ public class RMContainerImpl implements RMContainer, Comparable<RMContainer> {
public void cancelIncreaseReservation() {
hasIncreaseReservation = false;
}
+
+ public void setQueueName(String queueName) {
+ this.queueName = queueName;
+ }
+
+ @Override
+ public String getQueueName() {
+ return queueName;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
index 864c3a8..47c83bc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
@@ -687,7 +687,7 @@ public abstract class AbstractYarnScheduler
updateMaximumAllocation(node, false);
// update resource to node
- node.setTotalResource(newResource);
+ node.updateTotalResource(newResource);
nodes.put(nm.getNodeID(), (N)node);
updateMaximumAllocation(node, true);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
index 17c60d9..13519a2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
@@ -403,6 +403,7 @@ public class SchedulerApplicationAttempt implements SchedulableEntity {
node.getNodeID(), appSchedulingInfo.getUser(), rmContext);
attemptResourceUsage.incReserved(node.getPartition(),
container.getResource());
+ ((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());
// Reset the re-reservation count
resetReReservations(priority);
@@ -750,14 +751,17 @@ public class SchedulerApplicationAttempt implements SchedulableEntity {
public synchronized void move(Queue newQueue) {
QueueMetrics oldMetrics = queue.getMetrics();
QueueMetrics newMetrics = newQueue.getMetrics();
+ String newQueueName = newQueue.getQueueName();
String user = getUser();
for (RMContainer liveContainer : liveContainers.values()) {
Resource resource = liveContainer.getContainer().getResource();
+ ((RMContainerImpl)liveContainer).setQueueName(newQueueName);
oldMetrics.releaseResources(user, 1, resource);
newMetrics.allocateResources(user, 1, resource, false);
}
for (Map<NodeId, RMContainer> map : reservedContainers.values()) {
for (RMContainer reservedContainer : map.values()) {
+ ((RMContainerImpl)reservedContainer).setQueueName(newQueueName);
Resource resource = reservedContainer.getReservedResource();
oldMetrics.unreserveResource(user, resource);
newMetrics.reserveResource(user, resource);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
index 81ddd11..8a3fd8b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
@@ -98,12 +98,12 @@ public abstract class SchedulerNode {
* Set total resources on the node.
* @param resource total resources on the node.
*/
- public synchronized void setTotalResource(Resource resource){
+ public synchronized void updateTotalResource(Resource resource){
this.totalResourceCapability = resource;
this.availableResource = Resources.subtract(totalResourceCapability,
this.usedResource);
}
-
+
/**
* Get the ID of the node which contains both its hostname and port.
*
@@ -176,7 +176,7 @@ public abstract class SchedulerNode {
+ " containers, " + getUsedResource() + " used and "
+ getAvailableResource() + " available after allocation");
}
-
+
/**
* The Scheduler increased container
*/
@@ -184,7 +184,7 @@ public abstract class SchedulerNode {
Resource deltaResource) {
changeContainerResource(containerId, deltaResource, true);
}
-
+
/**
* The Scheduler decreased container
*/
@@ -227,7 +227,8 @@ public abstract class SchedulerNode {
return false;
}
- protected synchronized void updateResource(Container container) {
+ protected synchronized void updateResourceForReleasedContainer(
+ Container container) {
addAvailableResource(container.getResource());
--numContainers;
}
@@ -246,7 +247,7 @@ public abstract class SchedulerNode {
/* remove the containers from the nodemanger */
if (null != launchedContainers.remove(container.getId())) {
- updateResource(container);
+ updateResourceForReleasedContainer(container);
}
LOG.info("Released container " + container.getId() + " of capacity "
@@ -303,7 +304,7 @@ public abstract class SchedulerNode {
return numContainers;
}
- public synchronized List<RMContainer> getRunningContainers() {
+ public synchronized List<RMContainer> getCopiedListOfRunningContainers() {
return new ArrayList<RMContainer>(launchedContainers.values());
}
@@ -312,7 +313,7 @@ public abstract class SchedulerNode {
}
protected synchronized void
- setReservedContainer(RMContainer reservedContainer) {
+ setReservedContainer(RMContainer reservedContainer) {
this.reservedContainer = reservedContainer;
}
@@ -330,7 +331,7 @@ public abstract class SchedulerNode {
public void updateLabels(Set<String> labels) {
this.labels = labels;
}
-
+
/**
* Get partition of which the node belongs to, if node-labels of this node is
* empty or null, it belongs to NO_LABEL partition. And since we only support
@@ -338,7 +339,7 @@ public abstract class SchedulerNode {
*/
public String getPartition() {
if (this.labels == null || this.labels.isEmpty()) {
- return RMNodeLabelsManager.NO_LABEL;
+ return RMNodeLabelsManager.NO_LABEL;
} else {
return this.labels.iterator().next();
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index 5153018..ca4a2fd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
@@ -1150,7 +1149,7 @@ public class CapacityScheduler extends
String oldPartition = node.getPartition();
// Update resources of these containers
- for (RMContainer rmContainer : node.getRunningContainers()) {
+ for (RMContainer rmContainer : node.getCopiedListOfRunningContainers()) {
FiCaSchedulerApp application =
getApplicationAttempt(rmContainer.getApplicationAttemptId());
if (null != application) {
@@ -1506,7 +1505,7 @@ public class CapacityScheduler extends
}
// Remove running containers
- List<RMContainer> runningContainers = node.getRunningContainers();
+ List<RMContainer> runningContainers = node.getCopiedListOfRunningContainers();
for (RMContainer container : runningContainers) {
super.completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus(
@@ -1623,9 +1622,9 @@ public class CapacityScheduler extends
public FiCaSchedulerNode getNode(NodeId nodeId) {
return nodes.get(nodeId);
}
-
+
@Lock(Lock.NoLock.class)
- Map<NodeId, FiCaSchedulerNode> getAllNodes() {
+ public Map<NodeId, FiCaSchedulerNode> getAllNodes() {
return nodes;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
index ab3aa7d..6db5074 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -1042,21 +1042,24 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
return getBoolean(LAZY_PREEMPTION_ENALBED, DEFAULT_LAZY_PREEMPTION_ENABLED);
}
+ private static final String PREEMPTION_CONFIG_PREFIX =
+ "yarn.resourcemanager.monitor.capacity.preemption.";
+
/** If true, run the policy but do not affect the cluster with preemption and
* kill events. */
public static final String PREEMPTION_OBSERVE_ONLY =
- "yarn.resourcemanager.monitor.capacity.preemption.observe_only";
+ PREEMPTION_CONFIG_PREFIX + "observe_only";
public static final boolean DEFAULT_PREEMPTION_OBSERVE_ONLY = false;
/** Time in milliseconds between invocations of this policy */
public static final String PREEMPTION_MONITORING_INTERVAL =
- "yarn.resourcemanager.monitor.capacity.preemption.monitoring_interval";
+ PREEMPTION_CONFIG_PREFIX + "monitoring_interval";
public static final long DEFAULT_PREEMPTION_MONITORING_INTERVAL = 3000L;
/** Time in milliseconds between requesting a preemption from an application
* and killing the container. */
public static final String PREEMPTION_WAIT_TIME_BEFORE_KILL =
- "yarn.resourcemanager.monitor.capacity.preemption.max_wait_before_kill";
+ PREEMPTION_CONFIG_PREFIX + "max_wait_before_kill";
public static final long DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL = 15000L;
/** Maximum percentage of resources preemptionCandidates in a single round. By
@@ -1064,7 +1067,7 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
* reclaimed from the cluster. After computing the total desired preemption,
* the policy scales it back within this limit. */
public static final String TOTAL_PREEMPTION_PER_ROUND =
- "yarn.resourcemanager.monitor.capacity.preemption.total_preemption_per_round";
+ PREEMPTION_CONFIG_PREFIX + "total_preemption_per_round";
public static final float DEFAULT_TOTAL_PREEMPTION_PER_ROUND = 0.1f;
/** Maximum amount of resources above the target capacity ignored for
@@ -1073,7 +1076,7 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
* High values would slow the time to capacity and (absent natural
* completions) it might prevent convergence to guaranteed capacity. */
public static final String PREEMPTION_MAX_IGNORED_OVER_CAPACITY =
- "yarn.resourcemanager.monitor.capacity.preemption.max_ignored_over_capacity";
+ PREEMPTION_CONFIG_PREFIX + "max_ignored_over_capacity";
public static final float DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY = 0.1f;
/**
* Given a computed preemption target, account for containers naturally
@@ -1083,7 +1086,16 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
* will reclaim almost 95% of resources within 5 * {@link
* #PREEMPTION_WAIT_TIME_BEFORE_KILL}, even absent natural termination. */
public static final String PREEMPTION_NATURAL_TERMINATION_FACTOR =
- "yarn.resourcemanager.monitor.capacity.preemption.natural_termination_factor";
+ PREEMPTION_CONFIG_PREFIX + "natural_termination_factor";
public static final float DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR =
0.2f;
+
+ /**
+ * When calculating which containers to be preempted, we will try to preempt
+ * containers for reserved containers first. By default is false.
+ */
+ public static final String PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS =
+ PREEMPTION_CONFIG_PREFIX + "select_based_on_reserved_containers";
+ public static final boolean DEFAULT_PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS =
+ false;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
index 595b3fe..38995ff 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
@@ -201,6 +201,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
new RMContainerImpl(container, this.getApplicationAttemptId(),
node.getNodeID(), appSchedulingInfo.getUser(), this.rmContext,
request.getNodeLabelExpression());
+ ((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());
updateAMContainerDiagnostics(AMState.ASSIGNED, null);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
index 1d0e78a..f90a53c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicat
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.util.resource.Resources;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@@ -142,9 +143,9 @@ public class FiCaSchedulerNode extends SchedulerNode {
}
@Override
- protected synchronized void updateResource(
+ protected synchronized void updateResourceForReleasedContainer(
Container container) {
- super.updateResource(container);
+ super.updateResourceForReleasedContainer(container);
if (killableContainers.containsKey(container.getId())) {
Resources.subtractFrom(totalKillableResources, container.getResource());
killableContainers.remove(container.getId());
@@ -170,6 +171,6 @@ public class FiCaSchedulerNode extends SchedulerNode {
}
public synchronized Map<ContainerId, RMContainer> getKillableContainers() {
- return killableContainers;
+ return Collections.unmodifiableMap(killableContainers);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
index 4b21f7f..24dcd06 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java
@@ -379,6 +379,7 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
RMContainer rmContainer = new RMContainerImpl(container,
getApplicationAttemptId(), node.getNodeID(),
appSchedulingInfo.getUser(), rmContext);
+ ((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());
// Add it to allContainers list.
newlyAllocatedContainers.add(rmContainer);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
index d6bc1fd..960339e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
@@ -916,7 +916,8 @@ public class FairScheduler extends
triggerUpdate();
// Remove running containers
- List<RMContainer> runningContainers = node.getRunningContainers();
+ List<RMContainer> runningContainers =
+ node.getCopiedListOfRunningContainers();
for (RMContainer container : runningContainers) {
super.completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus(
http://git-wip-us.apache.org/repos/asf/hadoop/blob/40367c8d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index 409a043..664bd4e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -937,7 +937,7 @@ public class FifoScheduler extends
return;
}
// Kill running containers
- for(RMContainer container : node.getRunningContainers()) {
+ for(RMContainer container : node.getCopiedListOfRunningContainers()) {
super.completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(),
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org