You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2015/04/22 05:57:39 UTC
hadoop git commit: YARN-3503. Expose disk utilization percentage and
bad local and log dir counts in NM metrics. Contributed by Varun Vasudev
Repository: hadoop
Updated Branches:
refs/heads/trunk bdd90110e -> 674c7ef64
YARN-3503. Expose disk utilization percentage and bad local and log dir counts in NM metrics. Contributed by Varun Vasudev
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/674c7ef6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/674c7ef6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/674c7ef6
Branch: refs/heads/trunk
Commit: 674c7ef64916fabbe59c8d6cdd50ca19cf7ddb7c
Parents: bdd9011
Author: Jian He <ji...@apache.org>
Authored: Tue Apr 21 20:55:59 2015 -0700
Committer: Jian He <ji...@apache.org>
Committed: Tue Apr 21 20:57:02 2015 -0700
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 ++
.../server/nodemanager/DirectoryCollection.java | 31 +++++++++++++
.../nodemanager/LocalDirsHandlerService.java | 21 +++++++++
.../yarn/server/nodemanager/NodeManager.java | 2 +-
.../nodemanager/metrics/NodeManagerMetrics.java | 48 ++++++++++++++++++++
.../nodemanager/TestDirectoryCollection.java | 14 ++++++
.../TestLocalDirsHandlerService.java | 31 ++++++++++++-
7 files changed, 148 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 9150372..3bb6f89 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -154,6 +154,9 @@ Release 2.8.0 - UNRELEASED
YARN-3494. Expose AM resource limit and usage in CS QueueMetrics. (Rohith
Sharmaks via jianhe)
+ YARN-3503. Expose disk utilization percentage and bad local and log dir
+ counts in NM metrics. (Varun Vasudev via jianhe)
+
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
index c019aa9..2658918 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
@@ -82,6 +82,8 @@ class DirectoryCollection {
private float diskUtilizationPercentageCutoff;
private long diskUtilizationSpaceCutoff;
+ private int goodDirsDiskUtilizationPercentage;
+
/**
* Create collection for the directories specified. No check for free space.
*
@@ -277,6 +279,7 @@ class DirectoryCollection {
+ dirsFailedCheck.get(dir).message);
}
}
+ setGoodDirsDiskUtilizationPercentage();
return setChanged;
}
@@ -390,4 +393,32 @@ class DirectoryCollection {
diskUtilizationSpaceCutoff < 0 ? 0 : diskUtilizationSpaceCutoff;
this.diskUtilizationSpaceCutoff = diskUtilizationSpaceCutoff;
}
+
+ private void setGoodDirsDiskUtilizationPercentage() {
+
+ long totalSpace = 0;
+ long usableSpace = 0;
+
+ for (String dir : localDirs) {
+ File f = new File(dir);
+ if (!f.isDirectory()) {
+ continue;
+ }
+ totalSpace += f.getTotalSpace();
+ usableSpace += f.getUsableSpace();
+ }
+ if (totalSpace != 0) {
+ long tmp = ((totalSpace - usableSpace) * 100) / totalSpace;
+ if (Integer.MIN_VALUE < tmp && Integer.MAX_VALUE > tmp) {
+ goodDirsDiskUtilizationPercentage = (int) tmp;
+ }
+ } else {
+ // got no good dirs
+ goodDirsDiskUtilizationPercentage = 0;
+ }
+ }
+
+ public int getGoodDirsDiskUtilizationPercentage() {
+ return goodDirsDiskUtilizationPercentage;
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
index 7d1aa53..493571d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
/**
* The class which provides functionality of checking the health of the local
@@ -84,6 +85,8 @@ public class LocalDirsHandlerService extends AbstractService {
private static String FILE_SCHEME = "file";
+ private NodeManagerMetrics nodeManagerMetrics = null;
+
/**
* Class which is used by the {@link Timer} class to periodically execute the
* disks' health checker code.
@@ -119,7 +122,12 @@ public class LocalDirsHandlerService extends AbstractService {
}
public LocalDirsHandlerService() {
+ this(null);
+ }
+
+ public LocalDirsHandlerService(NodeManagerMetrics nodeManagerMetrics) {
super(LocalDirsHandlerService.class.getName());
+ this.nodeManagerMetrics = nodeManagerMetrics;
}
/**
@@ -389,6 +397,8 @@ public class LocalDirsHandlerService extends AbstractService {
updateDirsAfterTest();
}
+ updateMetrics();
+
lastDisksCheckTime = System.currentTimeMillis();
}
@@ -462,4 +472,15 @@ public class LocalDirsHandlerService extends AbstractService {
validPaths.toArray(arrValidPaths);
return arrValidPaths;
}
+
+ protected void updateMetrics() {
+ if (nodeManagerMetrics != null) {
+ nodeManagerMetrics.setBadLocalDirs(localDirs.getFailedDirs().size());
+ nodeManagerMetrics.setBadLogDirs(logDirs.getFailedDirs().size());
+ nodeManagerMetrics.setGoodLocalDirsDiskUtilizationPerc(
+ localDirs.getGoodDirsDiskUtilizationPercentage());
+ nodeManagerMetrics.setGoodLogDirsDiskUtilizationPerc(
+ logDirs.getGoodDirsDiskUtilizationPercentage());
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 0bac8d7..4a28c6f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -258,7 +258,7 @@ public class NodeManager extends CompositeService
// NodeManager level dispatcher
this.dispatcher = new AsyncDispatcher();
- dirsHandler = new LocalDirsHandlerService();
+ dirsHandler = new LocalDirsHandlerService(metrics);
nodeHealthChecker =
new NodeHealthCheckerService(
getNodeHealthScriptRunner(conf), dirsHandler);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index 3615fee..400f14b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -48,6 +48,15 @@ public class NodeManagerMetrics {
@Metric MutableGaugeInt availableVCores;
@Metric("Container launch duration")
MutableRate containerLaunchDuration;
+ @Metric("# of bad local dirs")
+ MutableGaugeInt badLocalDirs;
+ @Metric("# of bad log dirs")
+ MutableGaugeInt badLogDirs;
+ @Metric("Disk utilization % on good local dirs")
+ MutableGaugeInt goodLocalDirsDiskUtilizationPerc;
+ @Metric("Disk utilization % on good log dirs")
+ MutableGaugeInt goodLogDirsDiskUtilizationPerc;
+
private long allocatedMB;
private long availableMB;
@@ -125,6 +134,24 @@ public class NodeManagerMetrics {
containerLaunchDuration.add(value);
}
+ public void setBadLocalDirs(int badLocalDirs) {
+ this.badLocalDirs.set(badLocalDirs);
+ }
+
+ public void setBadLogDirs(int badLogDirs) {
+ this.badLogDirs.set(badLogDirs);
+ }
+
+ public void setGoodLocalDirsDiskUtilizationPerc(
+ int goodLocalDirsDiskUtilizationPerc) {
+ this.goodLocalDirsDiskUtilizationPerc.set(goodLocalDirsDiskUtilizationPerc);
+ }
+
+ public void setGoodLogDirsDiskUtilizationPerc(
+ int goodLogDirsDiskUtilizationPerc) {
+ this.goodLogDirsDiskUtilizationPerc.set(goodLogDirsDiskUtilizationPerc);
+ }
+
public int getRunningContainers() {
return containersRunning.value();
}
@@ -143,4 +170,25 @@ public class NodeManagerMetrics {
public int getCompletedContainers() {
return containersCompleted.value();
}
+
+ @VisibleForTesting
+ public int getBadLogDirs() {
+ return badLogDirs.value();
+ }
+
+ @VisibleForTesting
+ public int getBadLocalDirs() {
+ return badLocalDirs.value();
+ }
+
+ @VisibleForTesting
+ public int getGoodLogDirsDiskUtilizationPerc() {
+ return goodLogDirsDiskUtilizationPerc.value();
+ }
+
+ @VisibleForTesting
+ public int getGoodLocalDirsDiskUtilizationPerc() {
+ return goodLocalDirsDiskUtilizationPerc.value();
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
index e435375..e4525a5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
@@ -129,24 +129,38 @@ public class TestDirectoryCollection {
Assert.assertEquals(0, dc.getGoodDirs().size());
Assert.assertEquals(1, dc.getFailedDirs().size());
Assert.assertEquals(1, dc.getFullDirs().size());
+ // no good dirs
+ Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
dc = new DirectoryCollection(dirs, 100.0F);
+ int utilizedSpacePerc =
+ (int) ((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
+ testDir.getTotalSpace());
dc.checkDirs();
Assert.assertEquals(1, dc.getGoodDirs().size());
Assert.assertEquals(0, dc.getFailedDirs().size());
Assert.assertEquals(0, dc.getFullDirs().size());
+ Assert.assertEquals(utilizedSpacePerc,
+ dc.getGoodDirsDiskUtilizationPercentage());
dc = new DirectoryCollection(dirs, testDir.getTotalSpace() / (1024 * 1024));
dc.checkDirs();
Assert.assertEquals(0, dc.getGoodDirs().size());
Assert.assertEquals(1, dc.getFailedDirs().size());
Assert.assertEquals(1, dc.getFullDirs().size());
+ // no good dirs
+ Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
dc = new DirectoryCollection(dirs, 100.0F, 0);
+ utilizedSpacePerc =
+ (int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
+ testDir.getTotalSpace());
dc.checkDirs();
Assert.assertEquals(1, dc.getGoodDirs().size());
Assert.assertEquals(0, dc.getFailedDirs().size());
Assert.assertEquals(0, dc.getFullDirs().size());
+ Assert.assertEquals(utilizedSpacePerc,
+ dc.getGoodDirsDiskUtilizationPercentage());
}
@Test
http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
index 84f2fad..a045e62 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -106,12 +107,40 @@ public class TestLocalDirsHandlerService {
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
0.0f);
- LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
+ NodeManagerMetrics nm = NodeManagerMetrics.create();
+ LocalDirsHandlerService dirSvc = new LocalDirsHandlerService(nm);
dirSvc.init(conf);
Assert.assertEquals(0, dirSvc.getLocalDirs().size());
Assert.assertEquals(0, dirSvc.getLogDirs().size());
Assert.assertEquals(1, dirSvc.getDiskFullLocalDirs().size());
Assert.assertEquals(1, dirSvc.getDiskFullLogDirs().size());
+ // check the metrics
+ Assert.assertEquals(2, nm.getBadLocalDirs());
+ Assert.assertEquals(2, nm.getBadLogDirs());
+ Assert.assertEquals(0, nm.getGoodLocalDirsDiskUtilizationPerc());
+ Assert.assertEquals(0, nm.getGoodLogDirsDiskUtilizationPerc());
+
+ conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
+ 100.0f);
+ nm = NodeManagerMetrics.create();
+ dirSvc = new LocalDirsHandlerService(nm);
+ dirSvc.init(conf);
+ Assert.assertEquals(1, dirSvc.getLocalDirs().size());
+ Assert.assertEquals(1, dirSvc.getLogDirs().size());
+ Assert.assertEquals(0, dirSvc.getDiskFullLocalDirs().size());
+ Assert.assertEquals(0, dirSvc.getDiskFullLogDirs().size());
+ // check the metrics
+ File dir = new File(localDir1);
+ int utilizationPerc =
+ (int) ((dir.getTotalSpace() - dir.getUsableSpace()) * 100 /
+ dir.getTotalSpace());
+ Assert.assertEquals(1, nm.getBadLocalDirs());
+ Assert.assertEquals(1, nm.getBadLogDirs());
+ Assert.assertEquals(utilizationPerc,
+ nm.getGoodLocalDirsDiskUtilizationPerc());
+ Assert
+ .assertEquals(utilizationPerc, nm.getGoodLogDirsDiskUtilizationPerc());
+
FileUtils.deleteDirectory(new File(localDir1));
FileUtils.deleteDirectory(new File(localDir2));
FileUtils.deleteDirectory(new File(logDir1));