You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2015/04/22 05:57:39 UTC

hadoop git commit: YARN-3503. Expose disk utilization percentage and bad local and log dir counts in NM metrics. Contributed by Varun Vasudev

Repository: hadoop
Updated Branches:
  refs/heads/trunk bdd90110e -> 674c7ef64


YARN-3503. Expose disk utilization percentage and bad local and log dir counts in NM metrics. Contributed by Varun Vasudev


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/674c7ef6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/674c7ef6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/674c7ef6

Branch: refs/heads/trunk
Commit: 674c7ef64916fabbe59c8d6cdd50ca19cf7ddb7c
Parents: bdd9011
Author: Jian He <ji...@apache.org>
Authored: Tue Apr 21 20:55:59 2015 -0700
Committer: Jian He <ji...@apache.org>
Committed: Tue Apr 21 20:57:02 2015 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 ++
 .../server/nodemanager/DirectoryCollection.java | 31 +++++++++++++
 .../nodemanager/LocalDirsHandlerService.java    | 21 +++++++++
 .../yarn/server/nodemanager/NodeManager.java    |  2 +-
 .../nodemanager/metrics/NodeManagerMetrics.java | 48 ++++++++++++++++++++
 .../nodemanager/TestDirectoryCollection.java    | 14 ++++++
 .../TestLocalDirsHandlerService.java            | 31 ++++++++++++-
 7 files changed, 148 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 9150372..3bb6f89 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -154,6 +154,9 @@ Release 2.8.0 - UNRELEASED
     YARN-3494. Expose AM resource limit and usage in CS QueueMetrics. (Rohith
     Sharmaks via jianhe)
 
+    YARN-3503. Expose disk utilization percentage and bad local and log dir 
+    counts in NM metrics. (Varun Vasudev via jianhe)
+
   OPTIMIZATIONS
 
     YARN-3339. TestDockerContainerExecutor should pull a single image and not

http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
index c019aa9..2658918 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
@@ -82,6 +82,8 @@ class DirectoryCollection {
   private float diskUtilizationPercentageCutoff;
   private long diskUtilizationSpaceCutoff;
 
+  private int goodDirsDiskUtilizationPercentage;
+
   /**
    * Create collection for the directories specified. No check for free space.
    * 
@@ -277,6 +279,7 @@ class DirectoryCollection {
             + dirsFailedCheck.get(dir).message);
       }
     }
+    setGoodDirsDiskUtilizationPercentage();
     return setChanged;
   }
 
@@ -390,4 +393,32 @@ class DirectoryCollection {
         diskUtilizationSpaceCutoff < 0 ? 0 : diskUtilizationSpaceCutoff;
     this.diskUtilizationSpaceCutoff = diskUtilizationSpaceCutoff;
   }
+
+  private void setGoodDirsDiskUtilizationPercentage() {
+
+    long totalSpace = 0;
+    long usableSpace = 0;
+
+    for (String dir : localDirs) {
+      File f = new File(dir);
+      if (!f.isDirectory()) {
+        continue;
+      }
+      totalSpace += f.getTotalSpace();
+      usableSpace += f.getUsableSpace();
+    }
+    if (totalSpace != 0) {
+      long tmp = ((totalSpace - usableSpace) * 100) / totalSpace;
+      if (Integer.MIN_VALUE < tmp && Integer.MAX_VALUE > tmp) {
+        goodDirsDiskUtilizationPercentage = (int) tmp;
+      }
+    } else {
+      // got no good dirs
+      goodDirsDiskUtilizationPercentage = 0;
+    }
+  }
+
+  public int getGoodDirsDiskUtilizationPercentage() {
+    return goodDirsDiskUtilizationPercentage;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
index 7d1aa53..493571d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 
 /**
  * The class which provides functionality of checking the health of the local
@@ -84,6 +85,8 @@ public class LocalDirsHandlerService extends AbstractService {
   
   private static String FILE_SCHEME = "file";
 
+  private NodeManagerMetrics nodeManagerMetrics = null;
+
   /**
    * Class which is used by the {@link Timer} class to periodically execute the
    * disks' health checker code.
@@ -119,7 +122,12 @@ public class LocalDirsHandlerService extends AbstractService {
   }
 
   public LocalDirsHandlerService() {
+    this(null);
+  }
+
+  public LocalDirsHandlerService(NodeManagerMetrics nodeManagerMetrics) {
     super(LocalDirsHandlerService.class.getName());
+    this.nodeManagerMetrics = nodeManagerMetrics;
   }
 
   /**
@@ -389,6 +397,8 @@ public class LocalDirsHandlerService extends AbstractService {
       updateDirsAfterTest();
     }
 
+    updateMetrics();
+
     lastDisksCheckTime = System.currentTimeMillis();
   }
 
@@ -462,4 +472,15 @@ public class LocalDirsHandlerService extends AbstractService {
     validPaths.toArray(arrValidPaths);
     return arrValidPaths;
   }
+
+  protected void updateMetrics() {
+    if (nodeManagerMetrics != null) {
+      nodeManagerMetrics.setBadLocalDirs(localDirs.getFailedDirs().size());
+      nodeManagerMetrics.setBadLogDirs(logDirs.getFailedDirs().size());
+      nodeManagerMetrics.setGoodLocalDirsDiskUtilizationPerc(
+          localDirs.getGoodDirsDiskUtilizationPercentage());
+      nodeManagerMetrics.setGoodLogDirsDiskUtilizationPerc(
+          logDirs.getGoodDirsDiskUtilizationPercentage());
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 0bac8d7..4a28c6f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -258,7 +258,7 @@ public class NodeManager extends CompositeService
     // NodeManager level dispatcher
     this.dispatcher = new AsyncDispatcher();
 
-    dirsHandler = new LocalDirsHandlerService();
+    dirsHandler = new LocalDirsHandlerService(metrics);
     nodeHealthChecker =
         new NodeHealthCheckerService(
             getNodeHealthScriptRunner(conf), dirsHandler);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index 3615fee..400f14b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -48,6 +48,15 @@ public class NodeManagerMetrics {
   @Metric MutableGaugeInt availableVCores;
   @Metric("Container launch duration")
       MutableRate containerLaunchDuration;
+  @Metric("# of bad local dirs")
+      MutableGaugeInt badLocalDirs;
+  @Metric("# of bad log dirs")
+      MutableGaugeInt badLogDirs;
+  @Metric("Disk utilization % on good local dirs")
+      MutableGaugeInt goodLocalDirsDiskUtilizationPerc;
+  @Metric("Disk utilization % on good log dirs")
+      MutableGaugeInt goodLogDirsDiskUtilizationPerc;
+
 
   private long allocatedMB;
   private long availableMB;
@@ -125,6 +134,24 @@ public class NodeManagerMetrics {
     containerLaunchDuration.add(value);
   }
 
+  public void setBadLocalDirs(int badLocalDirs) {
+    this.badLocalDirs.set(badLocalDirs);
+  }
+
+  public void setBadLogDirs(int badLogDirs) {
+    this.badLogDirs.set(badLogDirs);
+  }
+
+  public void setGoodLocalDirsDiskUtilizationPerc(
+      int goodLocalDirsDiskUtilizationPerc) {
+    this.goodLocalDirsDiskUtilizationPerc.set(goodLocalDirsDiskUtilizationPerc);
+  }
+
+  public void setGoodLogDirsDiskUtilizationPerc(
+      int goodLogDirsDiskUtilizationPerc) {
+    this.goodLogDirsDiskUtilizationPerc.set(goodLogDirsDiskUtilizationPerc);
+  }
+
   public int getRunningContainers() {
     return containersRunning.value();
   }
@@ -143,4 +170,25 @@ public class NodeManagerMetrics {
   public int getCompletedContainers() {
     return containersCompleted.value();
   }
+
+  @VisibleForTesting
+  public int getBadLogDirs() {
+    return badLogDirs.value();
+  }
+
+  @VisibleForTesting
+  public int getBadLocalDirs() {
+    return badLocalDirs.value();
+  }
+
+  @VisibleForTesting
+  public int getGoodLogDirsDiskUtilizationPerc() {
+    return goodLogDirsDiskUtilizationPerc.value();
+  }
+
+  @VisibleForTesting
+  public int getGoodLocalDirsDiskUtilizationPerc() {
+    return goodLocalDirsDiskUtilizationPerc.value();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
index e435375..e4525a5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
@@ -129,24 +129,38 @@ public class TestDirectoryCollection {
     Assert.assertEquals(0, dc.getGoodDirs().size());
     Assert.assertEquals(1, dc.getFailedDirs().size());
     Assert.assertEquals(1, dc.getFullDirs().size());
+    // no good dirs
+    Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
 
     dc = new DirectoryCollection(dirs, 100.0F);
+    int utilizedSpacePerc =
+        (int) ((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
+            testDir.getTotalSpace());
     dc.checkDirs();
     Assert.assertEquals(1, dc.getGoodDirs().size());
     Assert.assertEquals(0, dc.getFailedDirs().size());
     Assert.assertEquals(0, dc.getFullDirs().size());
+    Assert.assertEquals(utilizedSpacePerc,
+      dc.getGoodDirsDiskUtilizationPercentage());
 
     dc = new DirectoryCollection(dirs, testDir.getTotalSpace() / (1024 * 1024));
     dc.checkDirs();
     Assert.assertEquals(0, dc.getGoodDirs().size());
     Assert.assertEquals(1, dc.getFailedDirs().size());
     Assert.assertEquals(1, dc.getFullDirs().size());
+    // no good dirs
+    Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
 
     dc = new DirectoryCollection(dirs, 100.0F, 0);
+    utilizedSpacePerc =
+        (int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
+            testDir.getTotalSpace());
     dc.checkDirs();
     Assert.assertEquals(1, dc.getGoodDirs().size());
     Assert.assertEquals(0, dc.getFailedDirs().size());
     Assert.assertEquals(0, dc.getFullDirs().size());
+    Assert.assertEquals(utilizedSpacePerc,
+      dc.getGoodDirsDiskUtilizationPercentage());
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hadoop/blob/674c7ef6/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
index 84f2fad..a045e62 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.service.Service.STATE;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -106,12 +107,40 @@ public class TestLocalDirsHandlerService {
     conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
     conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
       0.0f);
-    LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
+    NodeManagerMetrics nm = NodeManagerMetrics.create();
+    LocalDirsHandlerService dirSvc = new LocalDirsHandlerService(nm);
     dirSvc.init(conf);
     Assert.assertEquals(0, dirSvc.getLocalDirs().size());
     Assert.assertEquals(0, dirSvc.getLogDirs().size());
     Assert.assertEquals(1, dirSvc.getDiskFullLocalDirs().size());
     Assert.assertEquals(1, dirSvc.getDiskFullLogDirs().size());
+    // check the metrics
+    Assert.assertEquals(2, nm.getBadLocalDirs());
+    Assert.assertEquals(2, nm.getBadLogDirs());
+    Assert.assertEquals(0, nm.getGoodLocalDirsDiskUtilizationPerc());
+    Assert.assertEquals(0, nm.getGoodLogDirsDiskUtilizationPerc());
+
+    conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
+      100.0f);
+    nm = NodeManagerMetrics.create();
+    dirSvc = new LocalDirsHandlerService(nm);
+    dirSvc.init(conf);
+    Assert.assertEquals(1, dirSvc.getLocalDirs().size());
+    Assert.assertEquals(1, dirSvc.getLogDirs().size());
+    Assert.assertEquals(0, dirSvc.getDiskFullLocalDirs().size());
+    Assert.assertEquals(0, dirSvc.getDiskFullLogDirs().size());
+    // check the metrics
+    File dir = new File(localDir1);
+    int utilizationPerc =
+        (int) ((dir.getTotalSpace() - dir.getUsableSpace()) * 100 /
+            dir.getTotalSpace());
+    Assert.assertEquals(1, nm.getBadLocalDirs());
+    Assert.assertEquals(1, nm.getBadLogDirs());
+    Assert.assertEquals(utilizationPerc,
+      nm.getGoodLocalDirsDiskUtilizationPerc());
+    Assert
+      .assertEquals(utilizationPerc, nm.getGoodLogDirsDiskUtilizationPerc());
+
     FileUtils.deleteDirectory(new File(localDir1));
     FileUtils.deleteDirectory(new File(localDir2));
     FileUtils.deleteDirectory(new File(logDir1));