You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ac...@apache.org on 2012/08/30 04:23:14 UTC
svn commit: r1378790 - in /hadoop/common/branches/branch-1-win: ./
src/core/org/apache/hadoop/util/ src/mapred/org/apache/hadoop/mapred/
src/test/org/apache/hadoop/mapred/
Author: acmurthy
Date: Thu Aug 30 02:23:14 2012
New Revision: 1378790
URL: http://svn.apache.org/viewvc?rev=1378790&view=rev
Log:
MAPREDUCE-4598. Add support for NodeHealthCheck script for Windows. Contributed by Bikas Saha.
Modified:
hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java
hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java
hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java
hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java
hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java
Modified: hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt (original)
+++ hadoop/common/branches/branch-1-win/CHANGES.branch-1-win.txt Thu Aug 30 02:23:14 2012
@@ -63,6 +63,9 @@ branch-hadoop-1-win - unreleased
HADOOP-8544 Move an assertion location in 'winutils chmod' (Chuan Liu via sanjay)
+ MAPREDUCE-4598. Add support for NodeHealthCheck script for Windows. (Bikas
+ Saha via acmurthy)
+
BUG FIXES
HDFS-6527. Backport HADOOP-7389: Use of TestingGroups by tests causes
Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/ProcessTree.java Thu Aug 30 02:23:14 2012
@@ -169,12 +169,8 @@ public class ProcessTree {
return;
}
- String[] args = null;
- if(Shell.WINDOWS){
- args = new String[] {Shell.WINUTILS, "task", "kill", pgrpId};
- } else {
- args = new String[] { "kill", "-" + signal.getValue() , "-"+pgrpId };
- }
+ String[] args =
+ Shell.getSignalKillProcessGroupCommand(signal.getValue(), pgrpId);
ShellCommandExecutor shexec = new ShellCommandExecutor(args);
try {
shexec.execute();
Modified: hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java (original)
+++ hadoop/common/branches/branch-1-win/src/core/org/apache/hadoop/util/Shell.java Thu Aug 30 02:23:14 2012
@@ -88,6 +88,34 @@ abstract public class Shell {
return (WINDOWS) ? new String[] { WINUTILS, "chown", owner }
: new String[] { "chown", owner };
}
+
+ /** Return a command to execute the given command in OS shell.
+ * On Windows, the passed in groupId can be used to launch
+ * and associate the given groupId in a process group. On
+ * non-Windows, groupId is ignored. */
+ public static String[] getRunCommand(String command,
+ String groupId) {
+ if (WINDOWS) {
+ if(ProcessTree.isSetsidAvailable) {
+ return new String[] { Shell.WINUTILS, "task", "create",
+ groupId, "cmd /c " + command };
+ } else {
+ return new String[] { "cmd", "/c", command };
+ }
+ } else {
+ return new String[] { "bash", "-c", command };
+ }
+ }
+
+ /** Return a command to send a kill signal to a given groupId */
+ public static String[] getSignalKillProcessGroupCommand(int code,
+ String groupId) {
+ if (WINDOWS) {
+ return new String[] { Shell.WINUTILS, "task", "kill", groupId };
+ } else {
+ return new String[] { "kill", "-" + code , "-" + groupId };
+ }
+ }
/**Time after which the executing script would be timedout*/
protected long timeOutInterval = 0L;
@@ -393,7 +421,6 @@ abstract public class Shell {
}
timeOutInterval = timeout;
}
-
/** Execute the shell command. */
public void execute() throws IOException {
Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/DefaultTaskController.java Thu Aug 30 02:23:14 2012
@@ -126,18 +126,7 @@ public class DefaultTaskController exten
String commandFile = writeCommand(cmdLine, rawFs, p).getAbsolutePath();
rawFs.setPermission(p, TaskController.TASK_LAUNCH_SCRIPT_PERMISSION);
- String[] commandArray = null;
- if(Shell.WINDOWS) {
- if(ProcessTree.isSetsidAvailable) {
- commandArray = new String[] { Shell.WINUTILS, "task", "create",
- attemptId, "cmd /c " + commandFile };
- }
- else {
- commandArray = new String[]{ "cmd", "/c", commandFile};
- }
- } else {
- commandArray = new String[] { "bash", "-c", commandFile };
- }
+ String[] commandArray = Shell.getRunCommand(commandFile, attemptId);
shExec = new ShellCommandExecutor(commandArray, currentWorkDirectory);
shExec.execute();
} catch (Exception e) {
Modified: hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java (original)
+++ hadoop/common/branches/branch-1-win/src/mapred/org/apache/hadoop/mapred/NodeHealthCheckerService.java Thu Aug 30 02:23:14 2012
@@ -28,7 +28,13 @@ import java.util.TimerTask;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.mapred.TaskTrackerStatus.TaskTrackerHealthStatus;
+import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Shell.ExitCodeException;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
@@ -106,13 +112,15 @@ class NodeHealthCheckerService {
String exceptionStackTrace = "";
public NodeHealthMonitorExecutor(String[] args) {
- ArrayList<String> execScript = new ArrayList<String>();
- execScript.add(nodeHealthScript);
+ final String windowsGroupId =
+ "_HadoopWindowsNodeHealthCheckerService_" +
+ Long.toString(System.currentTimeMillis()); // avoid name collisions
+ String scriptCommand = nodeHealthScript;
if (args != null) {
- execScript.addAll(Arrays.asList(args));
+ scriptCommand = nodeHealthScript + " " + StringUtils.join(" ", args);
}
- shexec = new ShellCommandExecutor((String[]) execScript
- .toArray(new String[execScript.size()]), null, null, scriptTimeout);
+ String[] command = Shell.getRunCommand(scriptCommand, windowsGroupId);
+ shexec = new ShellCommandExecutor(command, null, null, scriptTimeout);
}
@Override
@@ -125,13 +133,15 @@ class NodeHealthCheckerService {
status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
} catch (Exception e) {
LOG.warn("Caught exception : " + e.getMessage());
- if (!shexec.isTimedOut()) {
- status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
- } else {
- status = HealthCheckerExitStatus.TIMED_OUT;
- }
+ status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
exceptionStackTrace = StringUtils.stringifyException(e);
} finally {
+ // Check for timeout in the finally block as the exception is not
+ // always thrown when the script is killed
+ if (shexec.isTimedOut()) {
+ status = HealthCheckerExitStatus.TIMED_OUT;
+ }
+
if (status == HealthCheckerExitStatus.SUCCESS) {
if (hasErrors(shexec.getOutput())) {
status = HealthCheckerExitStatus.FAILED;
@@ -327,8 +337,22 @@ class NodeHealthCheckerService {
if (nodeHealthScript == null || nodeHealthScript.trim().isEmpty()) {
return false;
}
- File f = new File(nodeHealthScript);
- return f.exists() && f.canExecute();
+
+ Path path = new Path(nodeHealthScript);
+ try {
+ // Use LocalFileSystem to check if the script exists and if it
+ // is executable as Java's API does not work well cross-platform
+ FileSystem localFS = FileSystem.getLocal(conf);
+ FileStatus fs = localFS.getFileStatus(path);
+ FsPermission fsPerm = fs.getPermission();
+ return (localFS.exists(path)
+ && fsPerm.getUserAction().implies(FsAction.EXECUTE));
+ } catch (IOException ex) {
+ LOG.warn("Failed to extract file status " + path
+ + ". Node health script will not be allowed to run");
+ }
+
+ return false;
}
private synchronized void setHealthStatus(boolean isHealthy, String output) {
Modified: hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java?rev=1378790&r1=1378789&r2=1378790&view=diff
==============================================================================
--- hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java (original)
+++ hadoop/common/branches/branch-1-win/src/test/org/apache/hadoop/mapred/TestNodeHealthService.java Thu Aug 30 02:23:14 2012
@@ -27,7 +27,9 @@ import java.util.TimerTask;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.mapred.TaskTrackerStatus.TaskTrackerHealthStatus;
+import org.apache.hadoop.util.Shell;
import junit.framework.TestCase;
@@ -45,7 +47,8 @@ public class TestNodeHealthService exten
private String testRootDir = new File(System.getProperty("test.build.data",
"/tmp")).getAbsolutePath();
- private File nodeHealthscriptFile = new File(testRootDir, "failingscript.sh");
+ private File nodeHealthscriptFile = new File(testRootDir,
+ Shell.WINDOWS ? "failingscript.cmd" : "failingscript.sh");
@Override
protected void tearDown() throws Exception {
@@ -69,15 +72,17 @@ public class TestNodeHealthService exten
}
private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable)
- throws IOException {
+ throws IOException, InterruptedException {
PrintWriter pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile));
pw.println(scriptStr);
pw.flush();
pw.close();
- nodeHealthscriptFile.setExecutable(setExecutable);
+ FileUtil.chmod(nodeHealthscriptFile.getAbsolutePath(),
+ (setExecutable ? "+x" : "-x"));
}
- public void testNodeHealthScriptShouldRun() throws IOException {
+ public void testNodeHealthScriptShouldRun()
+ throws IOException, InterruptedException {
// Node health script should not start if there is no property called
// node health script path.
assertFalse("Health checker should not have started",
@@ -103,7 +108,7 @@ public class TestNodeHealthService exten
TaskTrackerHealthStatus healthStatus = new TaskTrackerHealthStatus();
String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
String normalScript = "echo \"I am all fine\"";
- String timeOutScript = "sleep 4\n echo\"I am fine\"";
+ String timeOutScript = "sleep 4\n echo \"I am fine\"";
Configuration conf = getConfForNodeHealthScript();
conf.writeXml(new FileOutputStream(nodeHealthConfigFile));