You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ka...@apache.org on 2015/03/05 02:37:20 UTC
hadoop git commit: YARN-3122. Metrics for container's actual CPU
usage. (Anubhav Dhoot via kasha)
Repository: hadoop
Updated Branches:
refs/heads/trunk 722b47946 -> 53947f37c
YARN-3122. Metrics for container's actual CPU usage. (Anubhav Dhoot via kasha)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/53947f37
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/53947f37
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/53947f37
Branch: refs/heads/trunk
Commit: 53947f37c7a84a84ef4ab1a3cab63ff27c078385
Parents: 722b479
Author: Karthik Kambatla <ka...@apache.org>
Authored: Wed Mar 4 17:33:30 2015 -0800
Committer: Karthik Kambatla <ka...@apache.org>
Committed: Wed Mar 4 17:33:30 2015 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +
.../apache/hadoop/yarn/util/CpuTimeTracker.java | 99 ++++++++++++++++++++
.../util/LinuxResourceCalculatorPlugin.java | 46 +++------
.../yarn/util/ProcfsBasedProcessTree.java | 77 ++++++++++++++-
.../util/ResourceCalculatorProcessTree.java | 12 ++-
.../yarn/util/WindowsBasedProcessTree.java | 7 +-
.../util/TestLinuxResourceCalculatorPlugin.java | 4 +-
.../yarn/util/TestProcfsBasedProcessTree.java | 38 ++++++--
.../util/TestResourceCalculatorProcessTree.java | 5 +
.../monitor/ContainerMetrics.java | 39 ++++++--
.../monitor/ContainersMonitorImpl.java | 18 ++++
.../util/NodeManagerHardwareUtils.java | 16 +++-
12 files changed, 311 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 03bb20b..0b71bee 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -351,6 +351,9 @@ Release 2.7.0 - UNRELEASED
YARN-3272. Surface container locality info in RM web UI.
(Jian He via wangda)
+ YARN-3122. Metrics for container's actual CPU usage.
+ (Anubhav Dhoot via kasha)
+
OPTIMIZATIONS
YARN-2990. FairScheduler's delay-scheduling always waits for node-local and
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/CpuTimeTracker.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/CpuTimeTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/CpuTimeTracker.java
new file mode 100644
index 0000000..d36848e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/CpuTimeTracker.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.util;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import java.math.BigInteger;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class CpuTimeTracker {
+ public static final int UNAVAILABLE = -1;
+ final long MINIMUM_UPDATE_INTERVAL;
+
+ // CPU used time since system is on (ms)
+ BigInteger cumulativeCpuTime = BigInteger.ZERO;
+
+ // CPU used time read last time (ms)
+ BigInteger lastCumulativeCpuTime = BigInteger.ZERO;
+
+ // Unix timestamp while reading the CPU time (ms)
+ long sampleTime;
+ long lastSampleTime;
+ float cpuUsage;
+ BigInteger jiffyLengthInMillis;
+
+ public CpuTimeTracker(long jiffyLengthInMillis) {
+ this.jiffyLengthInMillis = BigInteger.valueOf(jiffyLengthInMillis);
+ this.cpuUsage = UNAVAILABLE;
+ this.sampleTime = UNAVAILABLE;
+ this.lastSampleTime = UNAVAILABLE;
+ MINIMUM_UPDATE_INTERVAL = 10 * jiffyLengthInMillis;
+ }
+
+ /**
+ * Return percentage of cpu time spent over the time since last update.
+ * CPU time spent is based on elapsed jiffies multiplied by amount of
+ * time for 1 core. Thus, if you use 2 cores completely you would have spent
+ * twice the actual time between updates and this will return 200%.
+ *
+ * @return Return percentage of cpu usage since last update, {@link
+ * CpuTimeTracker#UNAVAILABLE} if there haven't been 2 updates more than
+ * {@link CpuTimeTracker#MINIMUM_UPDATE_INTERVAL} apart
+ */
+ public float getCpuTrackerUsagePercent() {
+ if (lastSampleTime == UNAVAILABLE ||
+ lastSampleTime > sampleTime) {
+ // lastSampleTime > sampleTime may happen when the system time is changed
+ lastSampleTime = sampleTime;
+ lastCumulativeCpuTime = cumulativeCpuTime;
+ return cpuUsage;
+ }
+ // When lastSampleTime is sufficiently old, update cpuUsage.
+ // Also take a sample of the current time and cumulative CPU time for the
+ // use of the next calculation.
+ if (sampleTime > lastSampleTime + MINIMUM_UPDATE_INTERVAL) {
+ cpuUsage =
+ ((cumulativeCpuTime.subtract(lastCumulativeCpuTime)).floatValue())
+ * 100F / ((float) (sampleTime - lastSampleTime));
+ lastSampleTime = sampleTime;
+ lastCumulativeCpuTime = cumulativeCpuTime;
+ }
+ return cpuUsage;
+ }
+
+ public void updateElapsedJiffies(BigInteger elapedJiffies, long sampleTime) {
+ this.cumulativeCpuTime = elapedJiffies.multiply(jiffyLengthInMillis);
+ this.sampleTime = sampleTime;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("SampleTime " + this.sampleTime);
+ sb.append(" CummulativeCpuTime " + this.cumulativeCpuTime);
+ sb.append(" LastSampleTime " + this.lastSampleTime);
+ sb.append(" LastCummulativeCpuTime " + this.lastCumulativeCpuTime);
+ sb.append(" CpuUsage " + this.cpuUsage);
+ sb.append(" JiffyLengthMillisec " + this.jiffyLengthInMillis);
+ return sb.toString();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java
index 2347f40..ab1511a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/LinuxResourceCalculatorPlugin.java
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.io.IOException;
+import java.math.BigInteger;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -41,8 +42,6 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
private static final Log LOG =
LogFactory.getLog(LinuxResourceCalculatorPlugin.class);
- public static final int UNAVAILABLE = -1;
-
/**
* proc's meminfo virtual file has keys-values in the format
* "key:[ \t]*value[ \t]kB".
@@ -74,6 +73,7 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
private static final Pattern CPU_TIME_FORMAT =
Pattern.compile("^cpu[ \t]*([0-9]*)" +
"[ \t]*([0-9]*)[ \t]*([0-9]*)[ \t].*");
+ private CpuTimeTracker cpuTimeTracker;
private String procfsMemFile;
private String procfsCpuFile;
@@ -87,12 +87,6 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
private long inactiveSize = 0; // inactive cache memory (kB)
private int numProcessors = 0; // number of processors on the system
private long cpuFrequency = 0L; // CPU frequency on the system (kHz)
- private long cumulativeCpuTime = 0L; // CPU used time since system is on (ms)
- private long lastCumulativeCpuTime = 0L; // CPU used time read last time (ms)
- // Unix timestamp while reading the CPU time (ms)
- private float cpuUsage = UNAVAILABLE;
- private long sampleTime = UNAVAILABLE;
- private long lastSampleTime = UNAVAILABLE;
boolean readMemInfoFile = false;
boolean readCpuInfoFile = false;
@@ -106,10 +100,8 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
}
public LinuxResourceCalculatorPlugin() {
- procfsMemFile = PROCFS_MEMFILE;
- procfsCpuFile = PROCFS_CPUINFO;
- procfsStatFile = PROCFS_STAT;
- jiffyLengthInMillis = ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS;
+ this(PROCFS_MEMFILE, PROCFS_CPUINFO, PROCFS_STAT,
+ ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS);
}
/**
@@ -128,6 +120,7 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
this.procfsCpuFile = procfsCpuFile;
this.procfsStatFile = procfsStatFile;
this.jiffyLengthInMillis = jiffyLengthInMillis;
+ this.cpuTimeTracker = new CpuTimeTracker(jiffyLengthInMillis);
}
/**
@@ -276,12 +269,13 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
long uTime = Long.parseLong(mat.group(1));
long nTime = Long.parseLong(mat.group(2));
long sTime = Long.parseLong(mat.group(3));
- cumulativeCpuTime = uTime + nTime + sTime; // milliseconds
+ cpuTimeTracker.updateElapsedJiffies(
+ BigInteger.valueOf(uTime + nTime + sTime),
+ getCurrentTime());
break;
}
str = in.readLine();
}
- cumulativeCpuTime *= jiffyLengthInMillis;
} catch (IOException io) {
LOG.warn("Error reading the stream " + io);
} finally {
@@ -345,32 +339,18 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
@Override
public long getCumulativeCpuTime() {
readProcStatFile();
- return cumulativeCpuTime;
+ return cpuTimeTracker.cumulativeCpuTime.longValue();
}
/** {@inheritDoc} */
@Override
public float getCpuUsage() {
readProcStatFile();
- sampleTime = getCurrentTime();
- if (lastSampleTime == UNAVAILABLE ||
- lastSampleTime > sampleTime) {
- // lastSampleTime > sampleTime may happen when the system time is changed
- lastSampleTime = sampleTime;
- lastCumulativeCpuTime = cumulativeCpuTime;
- return cpuUsage;
- }
- // When lastSampleTime is sufficiently old, update cpuUsage.
- // Also take a sample of the current time and cumulative CPU time for the
- // use of the next calculation.
- final long MINIMUM_UPDATE_INTERVAL = 10 * jiffyLengthInMillis;
- if (sampleTime > lastSampleTime + MINIMUM_UPDATE_INTERVAL) {
- cpuUsage = (float)(cumulativeCpuTime - lastCumulativeCpuTime) * 100F /
- ((float)(sampleTime - lastSampleTime) * getNumProcessors());
- lastSampleTime = sampleTime;
- lastCumulativeCpuTime = cumulativeCpuTime;
+ float overallCpuUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
+ if (overallCpuUsage != CpuTimeTracker.UNAVAILABLE) {
+ overallCpuUsage = overallCpuUsage / getNumProcessors();
}
- return cpuUsage;
+ return overallCpuUsage;
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
index 69aa96d..134cec2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
@@ -66,6 +66,8 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
public static final String PROCFS_CMDLINE_FILE = "cmdline";
public static final long PAGE_SIZE;
public static final long JIFFY_LENGTH_IN_MILLIS; // in millisecond
+ private final CpuTimeTracker cpuTimeTracker;
+ private Clock clock;
enum MemInfo {
SIZE("Size"), RSS("Rss"), PSS("Pss"), SHARED_CLEAN("Shared_Clean"),
@@ -144,7 +146,7 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
new HashMap<String, ProcessInfo>();
public ProcfsBasedProcessTree(String pid) {
- this(pid, PROCFS);
+ this(pid, PROCFS, new SystemClock());
}
@Override
@@ -157,6 +159,10 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
}
}
+ public ProcfsBasedProcessTree(String pid, String procfsDir) {
+ this(pid, procfsDir, new SystemClock());
+ }
+
/**
* Build a new process tree rooted at the pid.
*
@@ -165,11 +171,14 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
*
* @param pid root of the process tree
* @param procfsDir the root of a proc file system - only used for testing.
+ * @param clock clock for controlling time for testing
*/
- public ProcfsBasedProcessTree(String pid, String procfsDir) {
+ public ProcfsBasedProcessTree(String pid, String procfsDir, Clock clock) {
super(pid);
+ this.clock = clock;
this.pid = getValidPID(pid);
this.procfsDir = procfsDir;
+ this.cpuTimeTracker = new CpuTimeTracker(JIFFY_LENGTH_IN_MILLIS);
}
/**
@@ -447,6 +456,26 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
return cpuTime;
}
+ private BigInteger getTotalProcessJiffies() {
+ BigInteger totalStime = BigInteger.ZERO;
+ long totalUtime = 0;
+ for (ProcessInfo p : processTree.values()) {
+ if (p != null) {
+ totalUtime += p.getUtime();
+ totalStime = totalStime.add(p.getStime());
+ }
+ }
+ return totalStime.add(BigInteger.valueOf(totalUtime));
+ }
+
+ @Override
+ public float getCpuUsagePercent() {
+ BigInteger processTotalJiffies = getTotalProcessJiffies();
+ cpuTimeTracker.updateElapsedJiffies(processTotalJiffies,
+ clock.getTime());
+ return cpuTimeTracker.getCpuTrackerUsagePercent();
+ }
+
private static String getValidPID(String pid) {
if (pid == null) return deadPid;
Matcher m = numberPattern.matcher(pid);
@@ -962,4 +991,48 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
return sb.toString();
}
}
+
+ /**
+ * Test the {@link ProcfsBasedProcessTree}
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ System.out.println("Provide <pid of process to monitor>");
+ return;
+ }
+
+ int numprocessors =
+ ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, null)
+ .getNumProcessors();
+ System.out.println("Number of processors " + numprocessors);
+
+ System.out.println("Creating ProcfsBasedProcessTree for process " +
+ args[0]);
+ ProcfsBasedProcessTree procfsBasedProcessTree = new
+ ProcfsBasedProcessTree(args[0]);
+ procfsBasedProcessTree.updateProcessTree();
+
+ System.out.println(procfsBasedProcessTree.getProcessTreeDump());
+ System.out.println("Get cpu usage " + procfsBasedProcessTree
+ .getCpuUsagePercent());
+
+ try {
+ // Sleep so we can compute the CPU usage
+ Thread.sleep(500L);
+ } catch (InterruptedException e) {
+ // do nothing
+ }
+
+ procfsBasedProcessTree.updateProcessTree();
+
+ System.out.println(procfsBasedProcessTree.getProcessTreeDump());
+ System.out.println("Cpu usage " + procfsBasedProcessTree
+ .getCpuUsagePercent());
+ System.out.println("Vmem usage in bytes " + procfsBasedProcessTree
+ .getCumulativeVmem());
+ System.out.println("Rss mem usage in bytes " + procfsBasedProcessTree
+ .getCumulativeRssmem());
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
index 85f6f1a..8c22c9e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
@@ -108,13 +108,23 @@ public abstract class ResourceCalculatorProcessTree extends Configured {
/**
* Get the CPU time in millisecond used by all the processes in the
- * process-tree since the process-tree created
+ * process-tree since the process-tree was created
*
* @return cumulative CPU time in millisecond since the process-tree created
* return 0 if it cannot be calculated
*/
public abstract long getCumulativeCpuTime();
+ /**
+ * Get the CPU usage by all the processes in the process-tree based on
+ * average between samples as a ratio of overall CPU cycles similar to top.
+ * Thus, if 2 out of 4 cores are used this should return 200.0.
+ *
+ * @return percentage CPU usage since the process-tree was created
+ * return {@link CpuTimeTracker#UNAVAILABLE} if it cannot be calculated
+ */
+ public abstract float getCpuUsagePercent();
+
/** Verify that the tree process id is same as its process group id.
* @return true if the process id matches else return false.
*/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
index 143d236..5c3251f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/WindowsBasedProcessTree.java
@@ -34,7 +34,7 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
static final Log LOG = LogFactory
.getLog(WindowsBasedProcessTree.class);
-
+
static class ProcessInfo {
String pid; // process pid
long vmem; // virtual memory
@@ -202,4 +202,9 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
return cpuTimeMs;
}
+ @Override
+ public float getCpuUsagePercent() {
+ return CpuTimeTracker.UNAVAILABLE;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java
index c9a33d0..ad09fdf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java
@@ -171,8 +171,8 @@ public class TestLinuxResourceCalculatorPlugin {
updateStatFile(uTime, nTime, sTime);
assertEquals(plugin.getCumulativeCpuTime(),
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
- assertEquals(plugin.getCpuUsage(), (float)(LinuxResourceCalculatorPlugin.UNAVAILABLE),0.0);
-
+ assertEquals(plugin.getCpuUsage(), (float)(CpuTimeTracker.UNAVAILABLE),0.0);
+
// Advance the time and sample again to test the CPU usage calculation
uTime += 100L;
plugin.advanceTime(200L);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java
index 7719255..d62e21d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestProcfsBasedProcessTree.java
@@ -236,8 +236,8 @@ public class TestProcfsBasedProcessTree {
}
protected ProcfsBasedProcessTree createProcessTree(String pid,
- String procfsRootDir) {
- return new ProcfsBasedProcessTree(pid, procfsRootDir);
+ String procfsRootDir, Clock clock) {
+ return new ProcfsBasedProcessTree(pid, procfsRootDir, clock);
}
protected void destroyProcessTree(String pid) throws IOException {
@@ -388,6 +388,8 @@ public class TestProcfsBasedProcessTree {
// test processes
String[] pids = { "100", "200", "300", "400" };
+ ControlledClock testClock = new ControlledClock(new SystemClock());
+ testClock.setTime(0);
// create the fake procfs root directory.
File procfsRootDir = new File(TEST_ROOT_DIR, "proc");
@@ -422,7 +424,7 @@ public class TestProcfsBasedProcessTree {
// crank up the process tree class.
Configuration conf = new Configuration();
ProcfsBasedProcessTree processTree =
- createProcessTree("100", procfsRootDir.getAbsolutePath());
+ createProcessTree("100", procfsRootDir.getAbsolutePath(), testClock);
processTree.setConf(conf);
// build the process tree.
processTree.updateProcessTree();
@@ -444,6 +446,12 @@ public class TestProcfsBasedProcessTree {
? 7200L * ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS : 0L;
Assert.assertEquals("Cumulative cpu time does not match", cumuCpuTime,
processTree.getCumulativeCpuTime());
+
+ // verify CPU usage
+ Assert.assertEquals("Percent CPU time should be set to -1 initially",
+ -1.0, processTree.getCpuUsagePercent(),
+ 0.01);
+
// Check by enabling smaps
setSmapsInProceTree(processTree, true);
// RSS=Min(shared_dirty,PSS)+PrivateClean+PrivateDirty (exclude r-xs,
@@ -460,15 +468,31 @@ public class TestProcfsBasedProcessTree {
"100", "200000", "200", "3000", "500" });
writeStatFiles(procfsRootDir, pids, procInfos, memInfo);
+ long elapsedTimeBetweenUpdatesMsec = 200000;
+ testClock.setTime(elapsedTimeBetweenUpdatesMsec);
// build the process tree.
processTree.updateProcessTree();
// verify cumulative cpu time again
+ long prevCumuCpuTime = cumuCpuTime;
cumuCpuTime =
ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS > 0
? 9400L * ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS : 0L;
Assert.assertEquals("Cumulative cpu time does not match", cumuCpuTime,
processTree.getCumulativeCpuTime());
+
+ double expectedCpuUsagePercent =
+ (ProcfsBasedProcessTree.JIFFY_LENGTH_IN_MILLIS > 0) ?
+ (cumuCpuTime - prevCumuCpuTime) * 100.0 /
+ elapsedTimeBetweenUpdatesMsec : 0;
+ // expectedCpuUsagePercent is given by (94000L - 72000) * 100/
+ // 200000;
+ // which in this case is 11. Lets verify that first
+ Assert.assertEquals(11, expectedCpuUsagePercent, 0.001);
+ Assert.assertEquals("Percent CPU time is not correct expected " +
+ expectedCpuUsagePercent, expectedCpuUsagePercent,
+ processTree.getCpuUsagePercent(),
+ 0.01);
} finally {
FileUtil.fullyDelete(procfsRootDir);
}
@@ -535,7 +559,8 @@ public class TestProcfsBasedProcessTree {
// crank up the process tree class.
ProcfsBasedProcessTree processTree =
- createProcessTree("100", procfsRootDir.getAbsolutePath());
+ createProcessTree("100", procfsRootDir.getAbsolutePath(),
+ new SystemClock());
setSmapsInProceTree(processTree, smapEnabled);
// verify cumulative memory
@@ -672,7 +697,7 @@ public class TestProcfsBasedProcessTree {
setupProcfsRootDir(procfsRootDir);
// crank up the process tree class.
- createProcessTree(pid, procfsRootDir.getAbsolutePath());
+ createProcessTree(pid, procfsRootDir.getAbsolutePath(), new SystemClock());
// Let us not create stat file for pid 100.
Assert.assertTrue(ProcfsBasedProcessTree.checkPidPgrpidForMatch(pid,
@@ -741,7 +766,8 @@ public class TestProcfsBasedProcessTree {
writeCmdLineFiles(procfsRootDir, pids, cmdLines);
ProcfsBasedProcessTree processTree =
- createProcessTree("100", procfsRootDir.getAbsolutePath());
+ createProcessTree("100", procfsRootDir.getAbsolutePath(),
+ new SystemClock());
// build the process tree.
processTree.updateProcessTree();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java
index 32ceb23..eaf7e8e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestResourceCalculatorProcessTree.java
@@ -53,6 +53,11 @@ public class TestResourceCalculatorProcessTree {
return 0;
}
+ @Override
+ public float getCpuUsagePercent() {
+ return CpuTimeTracker.UNAVAILABLE;
+ }
+
public boolean checkPidPgrpidForMatch() {
return false;
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
index 7850688..1375da8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
@@ -42,14 +42,29 @@ import static org.apache.hadoop.metrics2.lib.Interns.info;
@Metrics(context="container")
public class ContainerMetrics implements MetricsSource {
- public static final String PMEM_LIMIT_METRIC_NAME = "pMemLimit";
- public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimit";
+ public static final String PMEM_LIMIT_METRIC_NAME = "pMemLimitMBs";
+ public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
- public static final String PMEM_USAGE_METRIC_NAME = "pMemUsage";
+ public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
+ private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
+
+ // Use a multiplier of 1000 to avoid losing too much precision when
+ // converting to integers
+ private static final String VCORE_USAGE_METRIC_NAME = "milliVcoreUsage";
@Metric
public MutableStat pMemMBsStat;
+ // This tracks overall CPU percentage of the machine in terms of percentage
+ // of 1 core similar to top
+ // Thus if you use 2 cores completely out of 4 available cores this value
+ // will be 200
+ @Metric
+ public MutableStat cpuCoreUsagePercent;
+
+ @Metric
+ public MutableStat milliVcoresUsed;
+
@Metric
public MutableGaugeInt pMemLimitMbs;
@@ -57,7 +72,7 @@ public class ContainerMetrics implements MetricsSource {
public MutableGaugeInt vMemLimitMbs;
@Metric
- public MutableGaugeInt cpuVcores;
+ public MutableGaugeInt cpuVcoreLimit;
static final MetricsInfo RECORD_INFO =
info("ContainerResource", "Resource limit and usage by container");
@@ -95,11 +110,17 @@ public class ContainerMetrics implements MetricsSource {
this.pMemMBsStat = registry.newStat(
PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true);
+ this.cpuCoreUsagePercent = registry.newStat(
+ PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats",
+ "Usage", "Percents", true);
+ this.milliVcoresUsed = registry.newStat(
+ VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage",
+ "MilliVcores", true);
this.pMemLimitMbs = registry.newGauge(
PMEM_LIMIT_METRIC_NAME, "Physical memory limit in MBs", 0);
this.vMemLimitMbs = registry.newGauge(
VMEM_LIMIT_METRIC_NAME, "Virtual memory limit in MBs", 0);
- this.cpuVcores = registry.newGauge(
+ this.cpuVcoreLimit = registry.newGauge(
VCORE_LIMIT_METRIC_NAME, "CPU limit in number of vcores", 0);
}
@@ -170,6 +191,12 @@ public class ContainerMetrics implements MetricsSource {
this.pMemMBsStat.add(memoryMBs);
}
+ public void recordCpuUsage(
+ int totalPhysicalCpuPercent, int milliVcoresUsed) {
+ this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent);
+ this.milliVcoresUsed.add(milliVcoresUsed);
+ }
+
public void recordProcessId(String processId) {
registry.tag(PROCESSID_INFO, processId);
}
@@ -177,7 +204,7 @@ public class ContainerMetrics implements MetricsSource {
public void recordResourceLimit(int vmemLimit, int pmemLimit, int cpuVcores) {
this.vMemLimitMbs.set(vmemLimit);
this.pMemLimitMbs.set(pmemLimit);
- this.cpuVcores.set(cpuVcores);
+ this.cpuVcoreLimit.set(cpuVcores);
}
private synchronized void scheduleTimerTaskIfRequired() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index 2cecda6..b587e46 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
+import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
@@ -75,6 +76,7 @@ public class ContainersMonitorImpl extends AbstractService implements
private long maxVCoresAllottedForContainers;
private static final long UNKNOWN_MEMORY_LIMIT = -1L;
+ private int nodeCpuPercentageForYARN;
public ContainersMonitorImpl(ContainerExecutor exec,
AsyncDispatcher dispatcher, Context context) {
@@ -145,6 +147,9 @@ public class ContainersMonitorImpl extends AbstractService implements
LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
+ nodeCpuPercentageForYARN =
+ NodeManagerHardwareUtils.getNodeCpuPercentage(conf);
+
if (pmemCheckEnabled) {
// Logging if actual pmem cannot be determined.
long totalPhysicalMemoryOnNM = UNKNOWN_MEMORY_LIMIT;
@@ -434,6 +439,16 @@ public class ContainersMonitorImpl extends AbstractService implements
pTree.updateProcessTree(); // update process-tree
long currentVmemUsage = pTree.getCumulativeVmem();
long currentPmemUsage = pTree.getCumulativeRssmem();
+ // if machine has 6 cores and 3 are used,
+ // cpuUsagePercentPerCore should be 300% and
+ // cpuUsageTotalCoresPercentage should be 50%
+ float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
+ float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
+ resourceCalculatorPlugin.getNumProcessors();
+
+ // Multiply by 1000 to avoid losing data when converting to int
+ int milliVcoresUsed = (int) (cpuUsageTotalCoresPercentage * 1000
+ * maxVCoresAllottedForContainers /nodeCpuPercentageForYARN);
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getCumulativeVmem(1);
@@ -451,6 +466,9 @@ public class ContainersMonitorImpl extends AbstractService implements
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs).recordMemoryUsage(
(int) (currentPmemUsage >> 20));
+ ContainerMetrics.forContainer(
+ containerId, containerMetricsPeriodMs).recordCpuUsage
+ ((int)cpuUsagePercentPerCore, milliVcoresUsed);
}
boolean isMemoryOverLimit = false;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/53947f37/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java
index 07cf698..431cf5d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java
@@ -59,6 +59,19 @@ public class NodeManagerHardwareUtils {
public static float getContainersCores(ResourceCalculatorPlugin plugin,
Configuration conf) {
int numProcessors = plugin.getNumProcessors();
+ int nodeCpuPercentage = getNodeCpuPercentage(conf);
+
+ return (nodeCpuPercentage * numProcessors) / 100.0f;
+ }
+
+ /**
+ * Gets the percentage of physical CPU that is configured for YARN containers
+ * This is percent > 0 and <= 100 based on
+ * YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT
+ * @param conf Configuration object
+ * @return percent > 0 and <= 100
+ */
+ public static int getNodeCpuPercentage(Configuration conf) {
int nodeCpuPercentage =
Math.min(conf.getInt(
YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT,
@@ -73,7 +86,6 @@ public class NodeManagerHardwareUtils {
+ ". Value cannot be less than or equal to 0.";
throw new IllegalArgumentException(message);
}
-
- return (nodeCpuPercentage * numProcessors) / 100.0f;
+ return nodeCpuPercentage;
}
}