You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ac...@apache.org on 2012/08/30 04:23:14 UTC

svn commit: r1378790 - in /hadoop/common/branches/branch-1-win: ./ src/core/org/apache/hadoop/util/ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/

Author: acmurthy
Date: Thu Aug 30 02:23:14 2012
New Revision: 1378790

URL: http://svn.apache.org/viewvc?rev=1378790&view=rev
Log:
MAPREDUCE-4598. Add support for NodeHealthCheck script for Windows. Contributed by Bikas Saha.

Modified:
    hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
    hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java
    hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java
    hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java
    hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java
    hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java

Modified: hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt (original)
+++ hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt Thu Aug 30 02:23:14 2012
@@ -63,6 +63,9 @@ branch-hadoop-1-win - unreleased
 
     HADOOP-8544 Move an assertion location in 'winutils chmod' (Chuan Liu via sanjay)
 
+    MAPREDUCE-4598. Add support for NodeHealthCheck script for Windows. (Bikas
+    Saha via acmurthy)
+
 BUG FIXES
 
     HDFS-6527. Backport HADOOP-7389: Use of TestingGroups by tests causes

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java Thu Aug 30 02:23:14 2012
@@ -169,12 +169,8 @@ public class ProcessTree {
       return;
     }
 
-    String[] args = null;
-    if(Shell.WINDOWS){
-      args = new String[] {Shell.WINUTILS, "task", "kill", pgrpId};
-    } else {
-      args = new String[] { "kill", "-" + signal.getValue() , "-"+pgrpId };
-    }
+    String[] args =
+      Shell.getSignalKillProcessGroupCommand(signal.getValue(), pgrpId);
     ShellCommandExecutor shexec = new ShellCommandExecutor(args);
     try {
       shexec.execute();

Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java Thu Aug 30 02:23:14 2012
@@ -88,6 +88,34 @@ abstract public class Shell {
     return (WINDOWS) ? new String[] { WINUTILS, "chown", owner }
                      : new String[] { "chown", owner };
   }
+  
+  /** Return a command to execute the given command in OS shell.
+   *  On Windows, the passed in groupId can be used to launch
+   *  and associate the given groupId in a process group. On
+   *  non-Windows, groupId is ignored. */
+  public static String[] getRunCommand(String command,
+                                       String groupId) {
+    if (WINDOWS) {
+      if(ProcessTree.isSetsidAvailable) {
+        return new String[] { Shell.WINUTILS, "task", "create",
+                              groupId, "cmd /c " + command };
+      } else {
+        return new String[] { "cmd", "/c", command };
+      }
+    } else {
+      return new String[] { "bash", "-c", command };
+    }
+  }
+
+  /** Return a command to send a kill signal to a given groupId */
+  public static String[] getSignalKillProcessGroupCommand(int code,
+                                                          String groupId) {
+    if (WINDOWS) {
+      return new String[] { Shell.WINUTILS, "task", "kill", groupId };
+    } else {
+      return new String[] { "kill", "-" + code , "-" + groupId };
+    }
+  }
 
   /**Time after which the executing script would be timedout*/
   protected long timeOutInterval = 0L;
@@ -393,7 +421,6 @@ abstract public class Shell {
       }
       timeOutInterval = timeout;
     }
-        
 
     /** Execute the shell command. */
     public void execute() throws IOException {

Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java Thu Aug 30 02:23:14 2012
@@ -126,18 +126,7 @@ public class DefaultTaskController exten
 
       String commandFile = writeCommand(cmdLine, rawFs, p).getAbsolutePath();
       rawFs.setPermission(p, TaskController.TASK_LAUNCH_SCRIPT_PERMISSION);
-      String[] commandArray = null;
-      if(Shell.WINDOWS) {
-        if(ProcessTree.isSetsidAvailable) {
-          commandArray = new String[] { Shell.WINUTILS, "task", "create",
-              attemptId, "cmd /c " + commandFile };
-        }
-        else {
-          commandArray = new String[]{ "cmd", "/c", commandFile};
-        }
-      } else {
-        commandArray = new String[] { "bash", "-c", commandFile };
-      }
+      String[] commandArray = Shell.getRunCommand(commandFile, attemptId);
       shExec = new ShellCommandExecutor(commandArray, currentWorkDirectory);
       shExec.execute();
     } catch (Exception e) {

Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java Thu Aug 30 02:23:14 2012
@@ -28,7 +28,13 @@ import java.util.TimerTask;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.mapred.TaskTrackerStatus.TaskTrackerHealthStatus;
+import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
@@ -106,13 +112,15 @@ class NodeHealthCheckerService {
     String exceptionStackTrace = "";
 
     public NodeHealthMonitorExecutor(String[] args) {
-      ArrayList<String> execScript = new ArrayList<String>();
-      execScript.add(nodeHealthScript);
+      final String windowsGroupId = 
+          "_HadoopWindowsNodeHealthCheckerService_" + 
+          Long.toString(System.currentTimeMillis()); // avoid name collisions
+      String scriptCommand = nodeHealthScript;
       if (args != null) {
-        execScript.addAll(Arrays.asList(args));
+        scriptCommand = nodeHealthScript + " " + StringUtils.join(" ", args);
       }
-      shexec = new ShellCommandExecutor((String[]) execScript
-          .toArray(new String[execScript.size()]), null, null, scriptTimeout);
+      String[] command = Shell.getRunCommand(scriptCommand, windowsGroupId);
+      shexec = new ShellCommandExecutor(command, null, null, scriptTimeout);
     }
 
     @Override
@@ -125,13 +133,15 @@ class NodeHealthCheckerService {
         status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
       } catch (Exception e) {
         LOG.warn("Caught exception : " + e.getMessage());
-        if (!shexec.isTimedOut()) {
-          status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
-        } else {
-          status = HealthCheckerExitStatus.TIMED_OUT;
-        }
+        status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
         exceptionStackTrace = StringUtils.stringifyException(e);
       } finally {
+        // Check for timeout in the finally block as the exception is not
+        // always thrown when the script is killed
+        if (shexec.isTimedOut()) {
+          status = HealthCheckerExitStatus.TIMED_OUT;
+        }
+
         if (status == HealthCheckerExitStatus.SUCCESS) {
           if (hasErrors(shexec.getOutput())) {
             status = HealthCheckerExitStatus.FAILED;
@@ -327,8 +337,22 @@ class NodeHealthCheckerService {
     if (nodeHealthScript == null || nodeHealthScript.trim().isEmpty()) {
       return false;
     }
-    File f = new File(nodeHealthScript);
-    return f.exists() && f.canExecute();
+
+    Path path = new Path(nodeHealthScript);
+    try {
+      // Use LocalFileSystem to check if the script exists and if it
+      // is executable as Java's API does not work well cross-platform
+      FileSystem localFS = FileSystem.getLocal(conf);
+      FileStatus fs = localFS.getFileStatus(path);
+      FsPermission fsPerm = fs.getPermission();
+      return (localFS.exists(path)
+              && fsPerm.getUserAction().implies(FsAction.EXECUTE));
+    } catch (IOException ex) {
+      LOG.warn("Failed to extract file status " + path
+               + ". Node health script will not be allowed to run");
+    }
+    
+    return false;
   }
 
   private synchronized void setHealthStatus(boolean isHealthy, String output) {

Modified: hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java (original)
+++ hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java Thu Aug 30 02:23:14 2012
@@ -27,7 +27,9 @@ import java.util.TimerTask;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.mapred.TaskTrackerStatus.TaskTrackerHealthStatus;
+import org.apache.hadoop.util.Shell;
 
 import junit.framework.TestCase;
 
@@ -45,7 +47,8 @@ public class TestNodeHealthService exten
   private String testRootDir = new File(System.getProperty("test.build.data",
       "/tmp")).getAbsolutePath();
 
-  private File nodeHealthscriptFile = new File(testRootDir, "failingscript.sh");
+  private File nodeHealthscriptFile = new File(testRootDir,
+      Shell.WINDOWS ? "failingscript.cmd" : "failingscript.sh");
 
   @Override
   protected void tearDown() throws Exception {
@@ -69,15 +72,17 @@ public class TestNodeHealthService exten
   }
 
   private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable)
-      throws IOException {
+      throws IOException, InterruptedException {
     PrintWriter pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile));
     pw.println(scriptStr);
     pw.flush();
     pw.close();
-    nodeHealthscriptFile.setExecutable(setExecutable);
+    FileUtil.chmod(nodeHealthscriptFile.getAbsolutePath(),
+                  (setExecutable ? "+x" : "-x"));
   }
 
-  public void testNodeHealthScriptShouldRun() throws IOException {
+  public void testNodeHealthScriptShouldRun()
+      throws IOException, InterruptedException {
     // Node health script should not start if there is no property called
     // node health script path.
     assertFalse("Health checker should not have started",
@@ -103,7 +108,7 @@ public class TestNodeHealthService exten
     TaskTrackerHealthStatus healthStatus = new TaskTrackerHealthStatus();
     String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
     String normalScript = "echo \"I am all fine\"";
-    String timeOutScript = "sleep 4\n echo\"I am fine\"";
+    String timeOutScript = "sleep 4\n echo \"I am fine\"";
     Configuration conf = getConfForNodeHealthScript();
     conf.writeXml(new FileOutputStream(nodeHealthConfigFile));