You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by bo...@apache.org on 2012/07/31 23:09:44 UTC

svn commit: r1367784 - in /hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-se...

Author: bobby
Date: Tue Jul 31 21:09:43 2012
New Revision: 1367784

URL: http://svn.apache.org/viewvc?rev=1367784&view=rev
Log:
svn merge -c 1367783 FIXES: MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is  bad (Jason Lowe via bobby)

Modified:
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1367784&r1=1367783&r2=1367784&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Jul 31 21:09:43 2012
@@ -339,6 +339,9 @@ Release 2.0.0-alpha - 05-23-2012
 
     MAPREDUCE-4483. 2.0 build does not work (John George via bobby)
 
+    MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is
+    bad (Jason Lowe via bobby)
+
 Release 0.23.3 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java?rev=1367784&r1=1367783&r2=1367784&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java Tue Jul 31 21:09:43 2012
@@ -93,23 +93,7 @@ public class LocalDirsHandlerService ext
 
     @Override
     public void run() {
-      boolean newFailure = false;
-      if (localDirs.checkDirs()) {
-        newFailure = true;
-      }
-      if (logDirs.checkDirs()) {
-        newFailure = true;
-      }
-
-      if (newFailure) {
-        LOG.info("Disk(s) failed. " + getDisksHealthReport());
-        updateDirsInConfiguration();
-        if (!areDisksHealthy()) {
-          // Just log.
-          LOG.error("Most of the disks failed. " + getDisksHealthReport());
-        }
-      }
-      lastDisksCheckTime = System.currentTimeMillis();
+      checkDirs();
     }
   }
 
@@ -135,6 +119,10 @@ public class LocalDirsHandlerService ext
         YarnConfiguration.DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION);
     lastDisksCheckTime = System.currentTimeMillis();
     super.init(conf);
+
+    // Check the disk health immediately to weed out bad directories
+    // before other init code attempts to use them.
+    checkDirs();
   }
 
   /**
@@ -144,10 +132,8 @@ public class LocalDirsHandlerService ext
   public void start() {
     if (isDiskHealthCheckerEnabled) {
       dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
-      // Start the timer task for disk health checking immediately and
-      // then run periodically at interval time.
-      dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask, 0,
-                                                   diskHealthCheckInterval);
+      dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask,
+          diskHealthCheckInterval, diskHealthCheckInterval);
     }
     super.start();
   }
@@ -253,6 +239,26 @@ public class LocalDirsHandlerService ext
                       logDirs.toArray(new String[logDirs.size()]));
   }
 
+  private void checkDirs() {
+      boolean newFailure = false;
+      if (localDirs.checkDirs()) {
+        newFailure = true;
+      }
+      if (logDirs.checkDirs()) {
+        newFailure = true;
+      }
+
+      if (newFailure) {
+        LOG.info("Disk(s) failed. " + getDisksHealthReport());
+        updateDirsInConfiguration();
+        if (!areDisksHealthy()) {
+          // Just log.
+          LOG.error("Most of the disks failed. " + getDisksHealthReport());
+        }
+      }
+      lastDisksCheckTime = System.currentTimeMillis();
+  }
+
   public Path getLocalPathForWrite(String pathStr) throws IOException {
     return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
   }

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java?rev=1367784&r1=1367783&r2=1367784&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java Tue Jul 31 21:09:43 2012
@@ -110,6 +110,35 @@ public class TestDiskFailures {
     testDirsFailures(false);
   }
 
+  /**
+   * Make a local and log directory inaccessible during initialization
+   * and verify those bad directories are recognized and removed from
+   * the list of available local and log directories.
+   * @throws IOException
+   */
+  @Test
+  public void testDirFailuresOnStartup() throws IOException {
+    Configuration conf = new YarnConfiguration();
+    String localDir1 = new File(testDir, "localDir1").getPath();
+    String localDir2 = new File(testDir, "localDir2").getPath();
+    String logDir1 = new File(testDir, "logDir1").getPath();
+    String logDir2 = new File(testDir, "logDir2").getPath();
+    conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2);
+    conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
+
+    prepareDirToFail(localDir1);
+    prepareDirToFail(logDir2);
+
+    LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
+    dirSvc.init(conf);
+    List<String> localDirs = dirSvc.getLocalDirs();
+    Assert.assertEquals(1, localDirs.size());
+    Assert.assertEquals(localDir2, localDirs.get(0));
+    List<String> logDirs = dirSvc.getLogDirs();
+    Assert.assertEquals(1, logDirs.size());
+    Assert.assertEquals(logDir1, logDirs.get(0));
+  }
+
   private void testDirsFailures(boolean localORLogDirs) throws IOException {
     String dirType = localORLogDirs ? "local" : "log";
     String dirsProperty = localORLogDirs ? YarnConfiguration.NM_LOCAL_DIRS