You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by ka...@apache.org on 2014/08/08 16:40:46 UTC
svn commit: r1616785 - in
/hadoop/common/branches/branch-2/hadoop-yarn-project: ./
hadoop-yarn/dev-support/
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/
ha...
Author: kasha
Date: Fri Aug 8 14:40:46 2014
New Revision: 1616785
URL: http://svn.apache.org/r1616785
Log:
YARN-2352. FairScheduler: Collect metrics on duration of critical methods that affect performance. (kasha)
Added:
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSOpDurations.java
- copied unchanged from r1616784, hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSOpDurations.java
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1616785&r1=1616784&r2=1616785&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Fri Aug 8 14:40:46 2014
@@ -76,6 +76,9 @@ Release 2.6.0 - UNRELEASED
YARN-2288. Made persisted data in LevelDB timeline store be versioned. (Junping Du
via zjshen)
+ YARN-2352. FairScheduler: Collect metrics on duration of critical methods that
+ affect performance. (kasha)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml?rev=1616785&r1=1616784&r2=1616785&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml Fri Aug 8 14:40:46 2014
@@ -200,6 +200,13 @@
<Field name="updateInterval" />
<Bug pattern="IS2_INCONSISTENT_SYNC" />
</Match>
+ <!-- Inconsistent sync warning - callDurationMetrics is only initialized once and never changed -->
+ <Match>
+ <Class name="org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler" />
+ <Field name="fsOpDurations" />
+ <Bug pattern="IS2_INCONSISTENT_SYNC" />
+ </Match>
+
<!-- Inconsistent sync warning - numRetries is only initialized once and never changed -->
<Match>
<Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java?rev=1616785&r1=1616784&r2=1616785&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java Fri Aug 8 14:40:46 2014
@@ -149,6 +149,7 @@ public class FairScheduler extends
// Aggregate metrics
FSQueueMetrics rootMetrics;
+ FSOpDurations fsOpDurations;
// Time when we last updated preemption vars
protected long lastPreemptionUpdateTime;
@@ -256,8 +257,11 @@ public class FairScheduler extends
while (!Thread.currentThread().isInterrupted()) {
try {
Thread.sleep(updateInterval);
+ long start = getClock().getTime();
update();
preemptTasksIfNecessary();
+ long duration = getClock().getTime() - start;
+ fsOpDurations.addUpdateThreadRunDuration(duration);
} catch (InterruptedException ie) {
LOG.warn("Update thread interrupted. Exiting.");
return;
@@ -294,6 +298,7 @@ public class FairScheduler extends
* required resources per job.
*/
protected synchronized void update() {
+ long start = getClock().getTime();
updatePreemptionVariables(); // Determine if any queues merit preemption
FSQueue rootQueue = queueMgr.getRootQueue();
@@ -317,6 +322,9 @@ public class FairScheduler extends
" Demand: " + rootQueue.getDemand());
}
}
+
+ long duration = getClock().getTime() - start;
+ fsOpDurations.addUpdateCallDuration(duration);
}
/**
@@ -325,7 +333,7 @@ public class FairScheduler extends
* for each type of task.
*/
private void updatePreemptionVariables() {
- long now = clock.getTime();
+ long now = getClock().getTime();
lastPreemptionUpdateTime = now;
for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
if (!isStarvedForMinShare(sched)) {
@@ -352,7 +360,8 @@ public class FairScheduler extends
* defined as being below half its fair share.
*/
boolean isStarvedForFairShare(FSLeafQueue sched) {
- Resource desiredFairShare = Resources.min(RESOURCE_CALCULATOR, clusterResource,
+ Resource desiredFairShare = Resources.min(RESOURCE_CALCULATOR,
+ clusterResource,
Resources.multiply(sched.getFairShare(), .5), sched.getDemand());
return Resources.lessThan(RESOURCE_CALCULATOR, clusterResource,
sched.getResourceUsage(), desiredFairShare);
@@ -370,7 +379,7 @@ public class FairScheduler extends
return;
}
- long curTime = clock.getTime();
+ long curTime = getClock().getTime();
if (curTime - lastPreemptCheckTime < preemptionInterval) {
return;
}
@@ -398,6 +407,7 @@ public class FairScheduler extends
* We make sure that no queue is placed below its fair share in the process.
*/
protected void preemptResources(Resource toPreempt) {
+ long start = getClock().getTime();
if (Resources.equals(toPreempt, Resources.none())) {
return;
}
@@ -448,6 +458,9 @@ public class FairScheduler extends
}
}
}
+
+ long duration = getClock().getTime() - start;
+ fsOpDurations.addPreemptCallDuration(duration);
}
protected void warnOrKillContainer(RMContainer container) {
@@ -463,7 +476,7 @@ public class FairScheduler extends
if (time != null) {
// if we asked for preemption more than maxWaitTimeBeforeKill ms ago,
// proceed with kill
- if (time + waitTimeBeforeKill < clock.getTime()) {
+ if (time + waitTimeBeforeKill < getClock().getTime()) {
ContainerStatus status =
SchedulerUtils.createPreemptedContainerStatus(
container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);
@@ -474,11 +487,11 @@ public class FairScheduler extends
completedContainer(container, status, RMContainerEventType.KILL);
LOG.info("Killing container" + container +
" (after waiting for premption for " +
- (clock.getTime() - time) + "ms)");
+ (getClock().getTime() - time) + "ms)");
}
} else {
// track the request in the FSSchedulerApp itself
- app.addPreemption(container, clock.getTime());
+ app.addPreemption(container, getClock().getTime());
}
}
@@ -659,7 +672,7 @@ public class FairScheduler extends
rmContext);
if (transferStateFromPreviousAttempt) {
attempt.transferStateFromPreviousAttempt(application
- .getCurrentAppAttempt());
+ .getCurrentAppAttempt());
}
application.setCurrentAppAttempt(attempt);
@@ -960,6 +973,7 @@ public class FairScheduler extends
* Process a heartbeat update from a node.
*/
private synchronized void nodeUpdate(RMNode nm) {
+ long start = getClock().getTime();
if (LOG.isDebugEnabled()) {
LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterResource);
}
@@ -996,9 +1010,13 @@ public class FairScheduler extends
} else {
attemptScheduling(node);
}
+
+ long duration = getClock().getTime() - start;
+ fsOpDurations.addNodeUpdateDuration(duration);
}
void continuousSchedulingAttempt() throws InterruptedException {
+ long start = getClock().getTime();
List<NodeId> nodeIdList = new ArrayList<NodeId>(nodes.keySet());
// Sort the nodes by space available on them, so that we offer
// containers on emptier nodes first, facilitating an even spread. This
@@ -1021,6 +1039,9 @@ public class FairScheduler extends
": " + ex.toString(), ex);
}
}
+
+ long duration = getClock().getTime() - start;
+ fsOpDurations.addContinuousSchedulingRunDuration(duration);
}
/** Sort nodes by available resource */
@@ -1244,6 +1265,8 @@ public class FairScheduler extends
}
rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
+ fsOpDurations = FSOpDurations.getInstance(true);
+
// This stores per-application scheduling information
this.applications =
new ConcurrentHashMap<ApplicationId,SchedulerApplication<FSSchedulerApp>>();
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java?rev=1616785&r1=1616784&r2=1616785&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java Fri Aug 8 14:40:46 2014
@@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
@@ -3366,4 +3367,14 @@ public class TestFairScheduler extends F
assertNotEquals("One of the threads is still alive", 0, numRetries);
}
+
+ @Test
+ public void testPerfMetricsInited() {
+ scheduler.init(conf);
+ scheduler.start();
+ MetricsCollectorImpl collector = new MetricsCollectorImpl();
+ scheduler.fsOpDurations.getMetrics(collector, true);
+ assertEquals("Incorrect number of perf metrics", 1,
+ collector.getRecords().size());
+ }
}