You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aw...@apache.org on 2015/02/24 20:25:50 UTC

hadoop git commit: YARN-2980. Move health check script related functionality to hadoop-common (Varun Saxena via aw)

Repository: hadoop
Updated Branches:
  refs/heads/trunk 73bcfa99a -> d4ac6822e


YARN-2980. Move health check script related functionality to hadoop-common (Varun Saxena via aw)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d4ac6822
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d4ac6822
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d4ac6822

Branch: refs/heads/trunk
Commit: d4ac6822e1c5dfac504ced48f10ab57a55b49e93
Parents: 73bcfa9
Author: Allen Wittenauer <aw...@apache.org>
Authored: Tue Feb 24 11:25:26 2015 -0800
Committer: Allen Wittenauer <aw...@apache.org>
Committed: Tue Feb 24 11:25:26 2015 -0800

----------------------------------------------------------------------
 .../hadoop/util/NodeHealthScriptRunner.java     | 345 ++++++++++++++++++
 .../hadoop/util/TestNodeHealthScriptRunner.java | 136 +++++++
 hadoop-yarn-project/CHANGES.txt                 |   3 +
 .../nodemanager/NodeHealthCheckerService.java   |  12 +-
 .../nodemanager/NodeHealthScriptRunner.java     | 356 -------------------
 .../yarn/server/nodemanager/NodeManager.java    |  26 +-
 .../yarn/server/nodemanager/TestEventFlow.java  |   7 +-
 .../nodemanager/TestNodeHealthService.java      |  86 ++---
 .../BaseContainerManagerTest.java               |   7 +-
 .../webapp/TestContainerLogsPage.java           |  13 +-
 .../nodemanager/webapp/TestNMWebServer.java     |  13 +-
 .../nodemanager/webapp/TestNMWebServices.java   |   7 +-
 .../webapp/TestNMWebServicesApps.java           |   6 +-
 .../webapp/TestNMWebServicesContainers.java     |   5 +-
 14 files changed, 587 insertions(+), 435 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java
new file mode 100644
index 0000000..568ad5b
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java
@@ -0,0 +1,345 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.util.Shell.ExitCodeException;
+import org.apache.hadoop.util.Shell.ShellCommandExecutor;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * 
+ * The class which provides functionality of checking the health of the node
+ * using the configured node health script and reporting back to the service
+ * for which the health checker has been asked to report.
+ */
+public class NodeHealthScriptRunner extends AbstractService {
+
+  private static Log LOG = LogFactory.getLog(NodeHealthScriptRunner.class);
+
+  /** Absolute path to the health script. */
+  private String nodeHealthScript;
+  /** Delay after which node health script to be executed */
+  private long intervalTime;
+  /** Time after which the script should be timedout */
+  private long scriptTimeout;
+  /** Timer used to schedule node health monitoring script execution */
+  private Timer nodeHealthScriptScheduler;
+
+  /** ShellCommandExecutor used to execute monitoring script */
+  ShellCommandExecutor shexec = null;
+
+  /** Pattern used for searching in the output of the node health script */
+  static private final String ERROR_PATTERN = "ERROR";
+
+  /** Time out error message */
+  public static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out";
+
+  private boolean isHealthy;
+
+  private String healthReport;
+
+  private long lastReportedTime;
+
+  private TimerTask timer;
+  
+  private enum HealthCheckerExitStatus {
+    SUCCESS,
+    TIMED_OUT,
+    FAILED_WITH_EXIT_CODE,
+    FAILED_WITH_EXCEPTION,
+    FAILED
+  }
+
+
+  /**
+   * Class which is used by the {@link Timer} class to periodically execute the
+   * node health script.
+   * 
+   */
+  private class NodeHealthMonitorExecutor extends TimerTask {
+
+    String exceptionStackTrace = "";
+
+    public NodeHealthMonitorExecutor(String[] args) {
+      ArrayList<String> execScript = new ArrayList<String>();
+      execScript.add(nodeHealthScript);
+      if (args != null) {
+        execScript.addAll(Arrays.asList(args));
+      }
+      shexec = new ShellCommandExecutor(execScript
+          .toArray(new String[execScript.size()]), null, null, scriptTimeout);
+    }
+
+    @Override
+    public void run() {
+      HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS;
+      try {
+        shexec.execute();
+      } catch (ExitCodeException e) {
+        // ignore the exit code of the script
+        status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
+        // On Windows, we will not hit the Stream closed IOException
+        // thrown by stdout buffered reader for timeout event.
+        if (Shell.WINDOWS && shexec.isTimedOut()) {
+          status = HealthCheckerExitStatus.TIMED_OUT;
+        }
+      } catch (Exception e) {
+        LOG.warn("Caught exception : " + e.getMessage());
+        if (!shexec.isTimedOut()) {
+          status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
+        } else {
+          status = HealthCheckerExitStatus.TIMED_OUT;
+        }
+        exceptionStackTrace = StringUtils.stringifyException(e);
+      } finally {
+        if (status == HealthCheckerExitStatus.SUCCESS) {
+          if (hasErrors(shexec.getOutput())) {
+            status = HealthCheckerExitStatus.FAILED;
+          }
+        }
+        reportHealthStatus(status);
+      }
+    }
+
+    /**
+     * Method which is used to parse output from the node health monitor and
+     * send to the report address.
+     * 
+     * The timed out script or script which causes IOException output is
+     * ignored.
+     * 
+     * The node is marked unhealthy if
+     * <ol>
+     * <li>The node health script times out</li>
+     * <li>The node health scripts output has a line which begins with ERROR</li>
+     * <li>An exception is thrown while executing the script</li>
+     * </ol>
+     * If the script throws {@link IOException} or {@link ExitCodeException} the
+     * output is ignored and node is left remaining healthy, as script might
+     * have syntax error.
+     * 
+     * @param status
+     */
+    void reportHealthStatus(HealthCheckerExitStatus status) {
+      long now = System.currentTimeMillis();
+      switch (status) {
+      case SUCCESS:
+        setHealthStatus(true, "", now);
+        break;
+      case TIMED_OUT:
+        setHealthStatus(false, NODE_HEALTH_SCRIPT_TIMED_OUT_MSG);
+        break;
+      case FAILED_WITH_EXCEPTION:
+        setHealthStatus(false, exceptionStackTrace);
+        break;
+      case FAILED_WITH_EXIT_CODE:
+        setHealthStatus(true, "", now);
+        break;
+      case FAILED:
+        setHealthStatus(false, shexec.getOutput());
+        break;
+      }
+    }
+
+    /**
+     * Method to check if the output string has line which begins with ERROR.
+     * 
+     * @param output
+     *          string
+     * @return true if output string has error pattern in it.
+     */
+    private boolean hasErrors(String output) {
+      String[] splits = output.split("\n");
+      for (String split : splits) {
+        if (split.startsWith(ERROR_PATTERN)) {
+          return true;
+        }
+      }
+      return false;
+    }
+  }
+
+  public NodeHealthScriptRunner(String scriptName, long chkInterval, long timeout,
+      String[] scriptArgs) {
+    super(NodeHealthScriptRunner.class.getName());
+    this.lastReportedTime = System.currentTimeMillis();
+    this.isHealthy = true;
+    this.healthReport = "";
+    this.nodeHealthScript = scriptName;
+    this.intervalTime = chkInterval;
+    this.scriptTimeout = timeout;
+    this.timer = new NodeHealthMonitorExecutor(scriptArgs);
+  }
+
+  /*
+   * Method which initializes the values for the script path and interval time.
+   */
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+    super.serviceInit(conf);
+  }
+
+  /**
+   * Method used to start the Node health monitoring.
+   * 
+   */
+  @Override
+  protected void serviceStart() throws Exception {
+    // if health script path is not configured don't start the thread.
+    if (!shouldRun(nodeHealthScript)) {
+      LOG.info("Not starting node health monitor");
+      return;
+    }
+    nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true);
+    // Start the timer task immediately and
+    // then periodically at interval time.
+    nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime);
+    super.serviceStart();
+  }
+
+  /**
+   * Method used to terminate the node health monitoring service.
+   * 
+   */
+  @Override
+  protected void serviceStop() {
+    if (!shouldRun(nodeHealthScript)) {
+      return;
+    }
+    if (nodeHealthScriptScheduler != null) {
+      nodeHealthScriptScheduler.cancel();
+    }
+    if (shexec != null) {
+      Process p = shexec.getProcess();
+      if (p != null) {
+        p.destroy();
+      }
+    }
+  }
+
+  /**
+   * Gets the if the node is healthy or not
+   * 
+   * @return true if node is healthy
+   */
+  public boolean isHealthy() {
+    return isHealthy;
+  }
+
+  /**
+   * Sets if the node is healhty or not considering disks' health also.
+   * 
+   * @param isHealthy
+   *          if or not node is healthy
+   */
+  private synchronized void setHealthy(boolean isHealthy) {
+    this.isHealthy = isHealthy;
+  }
+
+  /**
+   * Returns output from health script. if node is healthy then an empty string
+   * is returned.
+   * 
+   * @return output from health script
+   */
+  public String getHealthReport() {
+    return healthReport;
+  }
+
+  /**
+   * Sets the health report from the node health script. Also set the disks'
+   * health info obtained from DiskHealthCheckerService.
+   *
+   * @param healthReport
+   */
+  private synchronized void setHealthReport(String healthReport) {
+    this.healthReport = healthReport;
+  }
+  
+  /**
+   * Returns time stamp when node health script was last run.
+   * 
+   * @return timestamp when node health script was last run
+   */
+  public long getLastReportedTime() {
+    return lastReportedTime;
+  }
+
+  /**
+   * Sets the last run time of the node health script.
+   * 
+   * @param lastReportedTime
+   */
+  private synchronized void setLastReportedTime(long lastReportedTime) {
+    this.lastReportedTime = lastReportedTime;
+  }
+
+  /**
+   * Method used to determine if or not node health monitoring service should be
+   * started or not. Returns true if following conditions are met:
+   * 
+   * <ol>
+   * <li>Path to Node health check script is not empty</li>
+   * <li>Node health check script file exists</li>
+   * </ol>
+   * 
+   * @return true if node health monitoring service can be started.
+   */
+  public static boolean shouldRun(String healthScript) {
+    if (healthScript == null || healthScript.trim().isEmpty()) {
+      return false;
+    }
+    File f = new File(healthScript);
+    return f.exists() && FileUtil.canExecute(f);
+  }
+
+  private synchronized void setHealthStatus(boolean isHealthy, String output) {
+		LOG.info("health status being set as " + output);
+    this.setHealthy(isHealthy);
+    this.setHealthReport(output);
+  }
+  
+  private synchronized void setHealthStatus(boolean isHealthy, String output,
+      long time) {
+	LOG.info("health status being set as " + output);
+    this.setHealthStatus(isHealthy, output);
+    this.setLastReportedTime(time);
+  }
+
+  /**
+   * Used only by tests to access the timer task directly
+   * @return the timer task
+   */
+  public TimerTask getTimerTask() {
+    return timer;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java
new file mode 100644
index 0000000..8fc64d1
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.TimerTask;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNodeHealthScriptRunner {
+
+  protected static File testRootDir = new File("target",
+      TestNodeHealthScriptRunner.class.getName() +
+      "-localDir").getAbsoluteFile();
+
+  private File nodeHealthscriptFile = new File(testRootDir,
+      Shell.appendScriptExtension("failingscript"));
+
+  @Before
+  public void setup() {
+    testRootDir.mkdirs();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (testRootDir.exists()) {
+      FileContext.getLocalFSFileContext().delete(
+          new Path(testRootDir.getAbsolutePath()), true);
+    }
+  }
+
+  private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable)
+          throws IOException {
+    PrintWriter pw = null;
+    try {
+      FileUtil.setWritable(nodeHealthscriptFile, true);
+      FileUtil.setReadable(nodeHealthscriptFile, true);
+      pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile));
+      pw.println(scriptStr);
+      pw.flush();
+    } finally {
+      pw.close();
+    }
+    FileUtil.setExecutable(nodeHealthscriptFile, setExecutable);
+  }
+
+  @Test
+  public void testNodeHealthScriptShouldRun() throws IOException {
+    Assert.assertFalse("Node health script should start",
+        NodeHealthScriptRunner.shouldRun(
+            nodeHealthscriptFile.getAbsolutePath()));
+    writeNodeHealthScriptFile("", false);
+    // Node health script should not start if the node health script is not
+    // executable.
+    Assert.assertFalse("Node health script should start",
+        NodeHealthScriptRunner.shouldRun(
+            nodeHealthscriptFile.getAbsolutePath()));
+    writeNodeHealthScriptFile("", true);
+    Assert.assertTrue("Node health script should start",
+        NodeHealthScriptRunner.shouldRun(
+            nodeHealthscriptFile.getAbsolutePath()));
+  }
+
+  @Test
+  public void testNodeHealthScript() throws Exception {
+    String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
+    String normalScript = "echo \"I am all fine\"";
+    String timeOutScript =
+      Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\""
+      : "sleep 4\necho \"I am fine\"";
+    Configuration conf = new Configuration();
+    writeNodeHealthScriptFile(normalScript, true);
+    NodeHealthScriptRunner nodeHealthScriptRunner = new NodeHealthScriptRunner(
+            nodeHealthscriptFile.getAbsolutePath(),
+            500, 1000, new String[] {});
+    nodeHealthScriptRunner.init(conf);
+    TimerTask timerTask = nodeHealthScriptRunner.getTimerTask();
+
+    timerTask.run();
+    // Normal Script runs successfully
+    Assert.assertTrue("Node health status reported unhealthy",
+        nodeHealthScriptRunner.isHealthy());
+    Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport());
+
+    // Error script.
+    writeNodeHealthScriptFile(errorScript, true);
+    // Run timer
+    timerTask.run();
+    Assert.assertFalse("Node health status reported healthy",
+        nodeHealthScriptRunner.isHealthy());
+    Assert.assertTrue(
+        nodeHealthScriptRunner.getHealthReport().contains("ERROR"));
+    
+    // Healthy script.
+    writeNodeHealthScriptFile(normalScript, true);
+    timerTask.run();
+    Assert.assertTrue("Node health status reported unhealthy",
+        nodeHealthScriptRunner.isHealthy());
+    Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport());
+
+    // Timeout script.
+    writeNodeHealthScriptFile(timeOutScript, true);
+    timerTask.run();
+    Assert.assertFalse("Node health status reported healthy even after timeout",
+    nodeHealthScriptRunner.isHealthy());
+    Assert.assertEquals(
+            NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG,
+            nodeHealthScriptRunner.getHealthReport());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 36bc707..39e2dc0 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -17,6 +17,9 @@ Trunk - Unreleased
 
     YARN-2796. deprecate sbin/yarn-daemon.sh (aw)
 
+    YARN-2980. Move health check script related functionality to hadoop-common
+    (Varun Saxena via aw)
+
   OPTIMIZATIONS
 
   BUG FIXES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
index 6d6001a..02b318a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.CompositeService;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 
 /**
  * The class which provides functionality of checking the health of the node and
@@ -33,15 +35,17 @@ public class NodeHealthCheckerService extends CompositeService {
 
   static final String SEPARATOR = ";";
 
-  public NodeHealthCheckerService() {
+  public NodeHealthCheckerService(NodeHealthScriptRunner scriptRunner,
+      LocalDirsHandlerService dirHandlerService) {
     super(NodeHealthCheckerService.class.getName());
-    dirsHandler = new LocalDirsHandlerService();
+    nodeHealthScriptRunner = scriptRunner;
+    dirsHandler = dirHandlerService;
   }
 
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
-    if (NodeHealthScriptRunner.shouldRun(conf)) {
-      nodeHealthScriptRunner = new NodeHealthScriptRunner();
+    if (NodeHealthScriptRunner.shouldRun(
+        conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH))) {
       addService(nodeHealthScriptRunner);
     }
     addService(dirsHandler);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java
deleted file mode 100644
index e3c9862..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java
+++ /dev/null
@@ -1,356 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.nodemanager;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Timer;
-import java.util.TimerTask;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.util.Shell.ExitCodeException;
-import org.apache.hadoop.util.Shell.ShellCommandExecutor;
-import org.apache.hadoop.util.Shell;
-import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-
-/**
- * 
- * The class which provides functionality of checking the health of the node
- * using the configured node health script and reporting back to the service
- * for which the health checker has been asked to report.
- */
-public class NodeHealthScriptRunner extends AbstractService {
-
-  private static Log LOG = LogFactory.getLog(NodeHealthScriptRunner.class);
-
-  /** Absolute path to the health script. */
-  private String nodeHealthScript;
-  /** Delay after which node health script to be executed */
-  private long intervalTime;
-  /** Time after which the script should be timedout */
-  private long scriptTimeout;
-  /** Timer used to schedule node health monitoring script execution */
-  private Timer nodeHealthScriptScheduler;
-
-  /** ShellCommandExecutor used to execute monitoring script */
-  ShellCommandExecutor shexec = null;
-
-  /** Configuration used by the checker */
-  private Configuration conf;
-
-  /** Pattern used for searching in the output of the node health script */
-  static private final String ERROR_PATTERN = "ERROR";
-
-  /** Time out error message */
-  static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out";
-
-  private boolean isHealthy;
-
-  private String healthReport;
-
-  private long lastReportedTime;
-
-  private TimerTask timer;
-  
-  private enum HealthCheckerExitStatus {
-    SUCCESS,
-    TIMED_OUT,
-    FAILED_WITH_EXIT_CODE,
-    FAILED_WITH_EXCEPTION,
-    FAILED
-  }
-
-
-  /**
-   * Class which is used by the {@link Timer} class to periodically execute the
-   * node health script.
-   * 
-   */
-  private class NodeHealthMonitorExecutor extends TimerTask {
-
-    String exceptionStackTrace = "";
-
-    public NodeHealthMonitorExecutor(String[] args) {
-      ArrayList<String> execScript = new ArrayList<String>();
-      execScript.add(nodeHealthScript);
-      if (args != null) {
-        execScript.addAll(Arrays.asList(args));
-      }
-      shexec = new ShellCommandExecutor(execScript
-          .toArray(new String[execScript.size()]), null, null, scriptTimeout);
-    }
-
-    @Override
-    public void run() {
-      HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS;
-      try {
-        shexec.execute();
-      } catch (ExitCodeException e) {
-        // ignore the exit code of the script
-        status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
-        // On Windows, we will not hit the Stream closed IOException
-        // thrown by stdout buffered reader for timeout event.
-        if (Shell.WINDOWS && shexec.isTimedOut()) {
-          status = HealthCheckerExitStatus.TIMED_OUT;
-        }
-      } catch (Exception e) {
-        LOG.warn("Caught exception : " + e.getMessage());
-        if (!shexec.isTimedOut()) {
-          status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
-        } else {
-          status = HealthCheckerExitStatus.TIMED_OUT;
-        }
-        exceptionStackTrace = StringUtils.stringifyException(e);
-      } finally {
-        if (status == HealthCheckerExitStatus.SUCCESS) {
-          if (hasErrors(shexec.getOutput())) {
-            status = HealthCheckerExitStatus.FAILED;
-          }
-        }
-        reportHealthStatus(status);
-      }
-    }
-
-    /**
-     * Method which is used to parse output from the node health monitor and
-     * send to the report address.
-     * 
-     * The timed out script or script which causes IOException output is
-     * ignored.
-     * 
-     * The node is marked unhealthy if
-     * <ol>
-     * <li>The node health script times out</li>
-     * <li>The node health scripts output has a line which begins with ERROR</li>
-     * <li>An exception is thrown while executing the script</li>
-     * </ol>
-     * If the script throws {@link IOException} or {@link ExitCodeException} the
-     * output is ignored and node is left remaining healthy, as script might
-     * have syntax error.
-     * 
-     * @param status
-     */
-    void reportHealthStatus(HealthCheckerExitStatus status) {
-      long now = System.currentTimeMillis();
-      switch (status) {
-      case SUCCESS:
-        setHealthStatus(true, "", now);
-        break;
-      case TIMED_OUT:
-        setHealthStatus(false, NODE_HEALTH_SCRIPT_TIMED_OUT_MSG);
-        break;
-      case FAILED_WITH_EXCEPTION:
-        setHealthStatus(false, exceptionStackTrace);
-        break;
-      case FAILED_WITH_EXIT_CODE:
-        setHealthStatus(true, "", now);
-        break;
-      case FAILED:
-        setHealthStatus(false, shexec.getOutput());
-        break;
-      }
-    }
-
-    /**
-     * Method to check if the output string has line which begins with ERROR.
-     * 
-     * @param output
-     *          string
-     * @return true if output string has error pattern in it.
-     */
-    private boolean hasErrors(String output) {
-      String[] splits = output.split("\n");
-      for (String split : splits) {
-        if (split.startsWith(ERROR_PATTERN)) {
-          return true;
-        }
-      }
-      return false;
-    }
-  }
-
-  public NodeHealthScriptRunner() {
-    super(NodeHealthScriptRunner.class.getName());
-    this.lastReportedTime = System.currentTimeMillis();
-    this.isHealthy = true;
-    this.healthReport = "";    
-  }
-
-  /*
-   * Method which initializes the values for the script path and interval time.
-   */
-  @Override
-  protected void serviceInit(Configuration conf) throws Exception {
-    this.conf = conf;
-    this.nodeHealthScript = 
-        conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH);
-    this.intervalTime = conf.getLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS,
-        YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS);
-    this.scriptTimeout = conf.getLong(
-        YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS,
-        YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS);
-    String[] args = conf.getStrings(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS,
-        new String[] {});
-    timer = new NodeHealthMonitorExecutor(args);
-    super.serviceInit(conf);
-  }
-
-  /**
-   * Method used to start the Node health monitoring.
-   * 
-   */
-  @Override
-  protected void serviceStart() throws Exception {
-    // if health script path is not configured don't start the thread.
-    if (!shouldRun(conf)) {
-      LOG.info("Not starting node health monitor");
-      return;
-    }
-    nodeHealthScriptScheduler = new Timer("NodeHealthMonitor-Timer", true);
-    // Start the timer task immediately and
-    // then periodically at interval time.
-    nodeHealthScriptScheduler.scheduleAtFixedRate(timer, 0, intervalTime);
-    super.serviceStart();
-  }
-
-  /**
-   * Method used to terminate the node health monitoring service.
-   * 
-   */
-  @Override
-  protected void serviceStop() {
-    if (!shouldRun(conf)) {
-      return;
-    }
-    if (nodeHealthScriptScheduler != null) {
-      nodeHealthScriptScheduler.cancel();
-    }
-    if (shexec != null) {
-      Process p = shexec.getProcess();
-      if (p != null) {
-        p.destroy();
-      }
-    }
-  }
-
-  /**
-   * Gets the if the node is healthy or not
-   * 
-   * @return true if node is healthy
-   */
-  public boolean isHealthy() {
-    return isHealthy;
-  }
-
-  /**
-   * Sets if the node is healhty or not considering disks' health also.
-   * 
-   * @param isHealthy
-   *          if or not node is healthy
-   */
-  private synchronized void setHealthy(boolean isHealthy) {
-    this.isHealthy = isHealthy;
-  }
-
-  /**
-   * Returns output from health script. if node is healthy then an empty string
-   * is returned.
-   * 
-   * @return output from health script
-   */
-  public String getHealthReport() {
-    return healthReport;
-  }
-
-  /**
-   * Sets the health report from the node health script. Also set the disks'
-   * health info obtained from DiskHealthCheckerService.
-   *
-   * @param healthReport
-   */
-  private synchronized void setHealthReport(String healthReport) {
-    this.healthReport = healthReport;
-  }
-  
-  /**
-   * Returns time stamp when node health script was last run.
-   * 
-   * @return timestamp when node health script was last run
-   */
-  public long getLastReportedTime() {
-    return lastReportedTime;
-  }
-
-  /**
-   * Sets the last run time of the node health script.
-   * 
-   * @param lastReportedTime
-   */
-  private synchronized void setLastReportedTime(long lastReportedTime) {
-    this.lastReportedTime = lastReportedTime;
-  }
-
-  /**
-   * Method used to determine if or not node health monitoring service should be
-   * started or not. Returns true if following conditions are met:
-   * 
-   * <ol>
-   * <li>Path to Node health check script is not empty</li>
-   * <li>Node health check script file exists</li>
-   * </ol>
-   * 
-   * @param conf
-   * @return true if node health monitoring service can be started.
-   */
-  public static boolean shouldRun(Configuration conf) {
-    String nodeHealthScript = 
-      conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH);
-    if (nodeHealthScript == null || nodeHealthScript.trim().isEmpty()) {
-      return false;
-    }
-    File f = new File(nodeHealthScript);
-    return f.exists() && FileUtil.canExecute(f);
-  }
-
-  private synchronized void setHealthStatus(boolean isHealthy, String output) {
-    this.setHealthy(isHealthy);
-    this.setHealthReport(output);
-  }
-  
-  private synchronized void setHealthStatus(boolean isHealthy, String output,
-      long time) {
-    this.setHealthStatus(isHealthy, output);
-    this.setLastReportedTime(time);
-  }
-
-  /**
-   * Used only by tests to access the timer task directly
-   * @return the timer task
-   */
-  TimerTask getTimerTask() {
-    return timer;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 53cbb11..7584138 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.service.CompositeService;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.StringUtils;
@@ -181,6 +182,25 @@ public class NodeManager extends CompositeService
     }
   }
 
+  public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration conf) {
+    String nodeHealthScript = 
+        conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH);
+    if(!NodeHealthScriptRunner.shouldRun(nodeHealthScript)) {
+      LOG.info("Abey khali");
+      return null;
+    }
+    long nmCheckintervalTime = conf.getLong(
+        YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS,
+        YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS);
+    long scriptTimeout = conf.getLong(
+        YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS,
+        YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS);
+    String[] scriptArgs = conf.getStrings(
+        YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {});
+    return new NodeHealthScriptRunner(nodeHealthScript,
+        nmCheckintervalTime, scriptTimeout, scriptArgs);
+  }
+
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
 
@@ -216,9 +236,11 @@ public class NodeManager extends CompositeService
     // NodeManager level dispatcher
     this.dispatcher = new AsyncDispatcher();
 
-    nodeHealthChecker = new NodeHealthCheckerService();
+    dirsHandler = new LocalDirsHandlerService();
+    nodeHealthChecker =
+        new NodeHealthCheckerService(
+            getNodeHealthScriptRunner(conf), dirsHandler);
     addService(nodeHealthChecker);
-    dirsHandler = nodeHealthChecker.getDiskHandler();
 
     this.context = createNMContext(containerTokenSecretManager,
         nmTokenSecretManager, nmStore);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
index d2caefe..c878e5c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
@@ -63,7 +63,7 @@ public class TestEventFlow {
   private static File remoteLogDir = new File("target",
       TestEventFlow.class.getName() + "-remoteLogDir").getAbsoluteFile();
   private static final long SIMULATED_RM_IDENTIFIER = 1234;
-  
+
   @Test
   public void testSuccessfulContainerLaunch() throws InterruptedException,
       IOException, YarnException {
@@ -98,9 +98,10 @@ public class TestEventFlow {
 
     DeletionService del = new DeletionService(exec);
     Dispatcher dispatcher = new AsyncDispatcher();
-    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
+        NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
     healthChecker.init(conf);
-    LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
     NodeManagerMetrics metrics = NodeManagerMetrics.create();
     NodeStatusUpdater nodeStatusUpdater =
         new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
index 3542196..2e41dea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
@@ -22,7 +22,6 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.util.TimerTask;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -30,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -40,6 +40,9 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.spy;
+
 public class TestNodeHealthService {
 
   private static volatile Log LOG = LogFactory
@@ -66,17 +69,7 @@ public class TestNodeHealthService {
           new Path(testRootDir.getAbsolutePath()), true);
     }
   }
-
-  private Configuration getConfForNodeHealthScript() {
-    Configuration conf = new Configuration();
-    conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH,
-        nodeHealthscriptFile.getAbsolutePath());
-    conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500);
-    conf.setLong(
-        YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000);
-    return conf;
-  }
-
+  
   private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable)
           throws IOException {
     PrintWriter pw = null;
@@ -92,28 +85,14 @@ public class TestNodeHealthService {
     FileUtil.setExecutable(nodeHealthscriptFile, setExecutable);
   }
 
-  @Test
-  public void testNodeHealthScriptShouldRun() throws IOException {
-    // Node health script should not start if there is no property called
-    // node health script path.
-    Assert.assertFalse("By default Health script should not have started",
-        NodeHealthScriptRunner.shouldRun(new Configuration()));
-    Configuration conf = getConfForNodeHealthScript();
-    // Node health script should not start if the node health script does not
-    // exists
-    Assert.assertFalse("Node health script should start",
-        NodeHealthScriptRunner.shouldRun(conf));
-    // Create script path.
-    conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
-    conf.addResource(nodeHealthConfigFile.getName());
-    writeNodeHealthScriptFile("", false);
-    // Node health script should not start if the node health script is not
-    // executable.
-    Assert.assertFalse("Node health script should start",
-        NodeHealthScriptRunner.shouldRun(conf));
-    writeNodeHealthScriptFile("", true);
-    Assert.assertTrue("Node health script should start",
-        NodeHealthScriptRunner.shouldRun(conf));
+  private Configuration getConfForNodeHealthScript() {
+    Configuration conf = new Configuration();
+    conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH,
+        nodeHealthscriptFile.getAbsolutePath());
+    conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500);
+    conf.setLong(
+        YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000);
+    return conf;
   }
 
   private void setHealthStatus(NodeHealthStatus healthStatus, boolean isHealthy,
@@ -124,27 +103,24 @@ public class TestNodeHealthService {
   }
 
   @Test
-  public void testNodeHealthScript() throws Exception {
+  public void testNodeHealthService() throws Exception {
     RecordFactory factory = RecordFactoryProvider.getRecordFactory(null);
     NodeHealthStatus healthStatus =
         factory.newRecordInstance(NodeHealthStatus.class);
-    String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
-    String normalScript = "echo \"I am all fine\"";
-    String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\""
-        : "sleep 4\necho \"I am fine\"";
     Configuration conf = getConfForNodeHealthScript();
     conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
     conf.addResource(nodeHealthConfigFile.getName());
+    writeNodeHealthScriptFile("", true);
 
-    writeNodeHealthScriptFile(normalScript, true);
-    NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService();
-    nodeHealthChecker.init(conf);
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
     NodeHealthScriptRunner nodeHealthScriptRunner =
-        nodeHealthChecker.getNodeHealthScriptRunner();
-    TimerTask timerTask = nodeHealthScriptRunner.getTimerTask();
-
-    timerTask.run();
+        spy(NodeManager.getNodeHealthScriptRunner(conf));
+    NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(
+    		nodeHealthScriptRunner, dirsHandler);
+    nodeHealthChecker.init(conf);
 
+    doReturn(true).when(nodeHealthScriptRunner).isHealthy();
+    doReturn("").when(nodeHealthScriptRunner).getHealthReport();
     setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
         nodeHealthChecker.getHealthReport(),
         nodeHealthChecker.getLastHealthReportTime());
@@ -155,11 +131,7 @@ public class TestNodeHealthService {
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
         .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
 
-    // write out error file.
-    // Healthy to unhealthy transition
-    writeNodeHealthScriptFile(errorScript, true);
-    // Run timer
-    timerTask.run();
+    doReturn(false).when(nodeHealthScriptRunner).isHealthy();   
     // update health status
     setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
         nodeHealthChecker.getHealthReport(),
@@ -169,10 +141,8 @@ public class TestNodeHealthService {
         .getIsNodeHealthy());
     Assert.assertTrue("Node health status reported healthy", healthStatus
         .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
-    
-    // Check unhealthy to healthy transitions.
-    writeNodeHealthScriptFile(normalScript, true);
-    timerTask.run();
+
+    doReturn(true).when(nodeHealthScriptRunner).isHealthy();
     setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
         nodeHealthChecker.getHealthReport(),
         nodeHealthChecker.getLastHealthReportTime());
@@ -184,8 +154,9 @@ public class TestNodeHealthService {
         .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
 
     // Healthy to timeout transition.
-    writeNodeHealthScriptFile(timeOutScript, true);
-    timerTask.run();
+    doReturn(false).when(nodeHealthScriptRunner).isHealthy();
+    doReturn(NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG)
+        .when(nodeHealthScriptRunner).getHealthReport();
     setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
         nodeHealthChecker.getHealthReport(),
         nodeHealthChecker.getLastHealthReportTime());
@@ -198,5 +169,4 @@ public class TestNodeHealthService {
             + NodeHealthCheckerService.SEPARATOR
             + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false)));
   }
-
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
index 1907e1a..8c0ceeb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnsupportedFileSystemException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -57,6 +58,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface;
 import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
@@ -174,9 +176,10 @@ public abstract class BaseContainerManagerTest {
     delSrvc.init(conf);
 
     exec = createContainerExecutor();
-    nodeHealthChecker = new NodeHealthCheckerService();
+    dirsHandler = new LocalDirsHandlerService();
+    nodeHealthChecker = new NodeHealthCheckerService(
+        NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
     nodeHealthChecker.init(conf);
-    dirsHandler = nodeHealthChecker.getDiskHandler();
     containerManager = createContainerManager(delSrvc);
     ((NMContext)context).setContainerManager(containerManager);
     nodeStatusUpdater.init(conf);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
index b1d4397..43100b3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -74,6 +75,12 @@ import com.google.inject.Module;
 
 public class TestContainerLogsPage {
 
+  private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) {
+    NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf);
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    return new NodeHealthCheckerService(scriptRunner, dirsHandler);
+  }
+
   @Test(timeout=30000)
   public void testContainerLogDirs() throws IOException, YarnException {
     File absLogDir = new File("target",
@@ -81,7 +88,7 @@ public class TestContainerLogsPage {
     String logdirwithFile = absLogDir.toURI().toString();
     Configuration conf = new Configuration();
     conf.set(YarnConfiguration.NM_LOG_DIRS, logdirwithFile);
-    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+    NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
     healthChecker.init(conf);
     LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
     NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler,
@@ -116,7 +123,7 @@ public class TestContainerLogsPage {
     files = ContainerLogsUtils.getContainerLogDirs(container1, user, nmContext);
     Assert.assertTrue(!(files.get(0).toString().contains("file:")));
   }
-  
+
   @Test(timeout = 10000)
   public void testContainerLogPageAccess() throws IOException {
     // SecureIOUtils require Native IO to be enabled. This test will run
@@ -137,7 +144,7 @@ public class TestContainerLogsPage {
         "kerberos");
       UserGroupInformation.setConfiguration(conf);
 
-      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+      NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
       healthChecker.init(conf);
       LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
       // Add an application and the corresponding containers

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
index a7006e0..e1845c7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
@@ -28,6 +28,7 @@ import java.io.Writer;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -77,7 +78,13 @@ public class TestNMWebServer {
     FileUtil.fullyDelete(testRootDir);
     FileUtil.fullyDelete(testLogDir);
   }
-  
+
+  private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) {
+    NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf);
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    return new NodeHealthCheckerService(scriptRunner, dirsHandler);
+  }
+
   private int startNMWebAppServer(String webAddr) {
     Context nmContext = new NodeManager.NMContext(null, null, null, null,
         null);
@@ -106,7 +113,7 @@ public class TestNMWebServer {
     Configuration conf = new Configuration();
     conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
     conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
-    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+    NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
     healthChecker.init(conf);
     LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
     conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr);
@@ -169,7 +176,7 @@ public class TestNMWebServer {
     Configuration conf = new Configuration();
     conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
     conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
-    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+    NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
     healthChecker.init(conf);
     LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java
index 7caad4a..5a89e74 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java
@@ -36,6 +36,7 @@ import org.junit.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.VersionInfo;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -98,14 +99,16 @@ public class TestNMWebServices extends JerseyTestBase {
       TestNMWebServices.class.getSimpleName() + "LogDir");
 
   private Injector injector = Guice.createInjector(new ServletModule() {
+
     @Override
     protected void configureServlets() {
       Configuration conf = new Configuration();
       conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
       conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
-      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+      dirsHandler = new LocalDirsHandlerService();
+      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
+          NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
       healthChecker.init(conf);
-      dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
       nmContext = new NodeManager.NMContext(null, null, dirsHandler,
           aclsManager, null);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java
index 3e7aac8..e274abb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java
@@ -33,6 +33,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -91,11 +92,14 @@ public class TestNMWebServicesApps extends JerseyTestBase {
       TestNMWebServicesApps.class.getSimpleName() + "LogDir");
 
   private Injector injector = Guice.createInjector(new ServletModule() {
+
     @Override
     protected void configureServlets() {
       conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
       conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
-      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+      LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
+          NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/d4ac6822/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java
index ceb1d57..3bfd440 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java
@@ -34,6 +34,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -123,7 +124,9 @@ public class TestNMWebServicesContainers extends JerseyTestBase {
       };
       conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
       conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
-      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+      LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+      NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
+          NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);