You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ma...@apache.org on 2011/11/30 00:28:20 UTC
svn commit: r1208135 [1/3] - in
/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project: ./ conf/
hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/
hadoop-mapreduce-client/hadoop-mapreduce-client-jobcli...
Author: mahadev
Date: Tue Nov 29 23:28:16 2011
New Revision: 1208135
URL: http://svn.apache.org/viewvc?rev=1208135&view=rev
Log:
MAPREDUCE-3121. NodeManager should handle disk-failures (Ravi Gummadi via mahadev) - Merging r1208131 from trunk.
Added:
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
- copied unchanged from r1208131, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
- copied unchanged from r1208131, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
- copied unchanged from r1208131, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java
- copied unchanged from r1208131, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
- copied unchanged from r1208131, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
- copied unchanged from r1208131, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
Removed:
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java
Modified:
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/conf/container-executor.cfg
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/TestNonAggregatingLogHandler.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Tue Nov 29 23:28:16 2011
@@ -6,6 +6,8 @@ Release 0.23.1 - Unreleased
NEW FEATURES
+ MAPREDUCE-3121. NodeManager should handle disk-failures (Ravi Gummadi via mahadev)
+
IMPROVEMENTS
MAPREDUCE-3375. [Gridmix] Memory Emulation system tests.
(Vinay Thota via amarrk)
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/conf/container-executor.cfg
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/conf/container-executor.cfg?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/conf/container-executor.cfg (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/conf/container-executor.cfg Tue Nov 29 23:28:16 2011
@@ -1,3 +1,3 @@
-yarn.nodemanager.local-dirs=#configured value of yarn.nodemanager.local-dirs. It can be a list of comma separated paths.
-yarn.nodemanager.log-dirs=#configured value of yarn.nodemanager.log-dirs.
yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
+banned.users=#comma separated list of users who can not run applications
+min.user.id=1000#Prevent other super-users
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java Tue Nov 29 23:28:16 2011
@@ -113,9 +113,10 @@ class LocalDistributedCacheManager {
Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
ExecutorService exec = Executors.newCachedThreadPool();
+ Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
for (LocalResource resource : localResources.values()) {
Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
- localDirAllocator, resource, new Random());
+ destPath, resource, new Random());
Future<Path> future = exec.submit(download);
resourcesToPaths.put(resource, future);
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java Tue Nov 29 23:28:16 2011
@@ -56,7 +56,7 @@ public class MiniMRYarnCluster extends M
}
public MiniMRYarnCluster(String testName, int noOfNMs) {
- super(testName, noOfNMs);
+ super(testName, noOfNMs, 4, 4);
//TODO: add the history server
historyServerWrapper = new JobHistoryServerWrapper();
addService(historyServerWrapper);
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java Tue Nov 29 23:28:16 2011
@@ -43,7 +43,8 @@ public class TestDistributedShell {
public static void setup() throws InterruptedException, IOException {
LOG.info("Starting up YARN cluster");
if (yarnCluster == null) {
- yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName());
+ yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName(),
+ 1, 1, 1);
yarnCluster.init(conf);
yarnCluster.start();
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java Tue Nov 29 23:28:16 2011
@@ -351,13 +351,39 @@ public class YarnConfiguration extends C
/** Class that calculates containers current resource utilization.*/
public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
NM_PREFIX + "container-monitor.resource-calculator.class";
-
+
+ /**
+ * Enable/Disable disks' health checker. Default is true.
+ * An expert level configuration property.
+ */
+ public static final String NM_DISK_HEALTH_CHECK_ENABLE =
+ NM_PREFIX + "disk-health-checker.enable";
+ /** Frequency of running disks' health checker.*/
+ public static final String NM_DISK_HEALTH_CHECK_INTERVAL_MS =
+ NM_PREFIX + "disk-health-checker.interval-ms";
+ /** By default, disks' health is checked every 2 minutes. */
+ public static final long DEFAULT_NM_DISK_HEALTH_CHECK_INTERVAL_MS =
+ 2 * 60 * 1000;
+
+ /**
+ * The minimum fraction of number of disks to be healthy for the nodemanager
+ * to launch new containers. This applies to nm-local-dirs and nm-log-dirs.
+ */
+ public static final String NM_MIN_HEALTHY_DISKS_FRACTION =
+ NM_PREFIX + "disk-health-checker.min-healthy-disks";
+ /**
+ * By default, at least 5% of disks are to be healthy to say that the node
+ * is healthy in terms of disks.
+ */
+ public static final float DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION
+ = 0.25F;
+
/** Frequency of running node health script.*/
public static final String NM_HEALTH_CHECK_INTERVAL_MS =
NM_PREFIX + "health-checker.interval-ms";
public static final long DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS = 10 * 60 * 1000;
-
- /** Script time out period.*/
+
+ /** Health check script time out period.*/
public static final String NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS =
NM_PREFIX + "health-checker.script.timeout-ms";
public static final long DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS =
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java Tue Nov 29 23:28:16 2011
@@ -31,6 +31,7 @@ import java.io.Writer;
import java.security.PrivilegedExceptionAction;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -105,12 +106,12 @@ public class AggregatedLogFormat {
public static class LogValue {
- private final String[] rootLogDirs;
+ private final List<String> rootLogDirs;
private final ContainerId containerId;
// TODO Maybe add a version string here. Instead of changing the version of
// the entire k-v format
- public LogValue(String[] rootLogDirs, ContainerId containerId) {
+ public LogValue(List<String> rootLogDirs, ContainerId containerId) {
this.rootLogDirs = rootLogDirs;
this.containerId = containerId;
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java Tue Nov 29 23:28:16 2011
@@ -33,7 +33,6 @@ import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
@@ -56,7 +55,10 @@ public class FSDownload implements Calla
private final UserGroupInformation userUgi;
private Configuration conf;
private LocalResource resource;
- private LocalDirAllocator dirs;
+
+ /** The local FS dir path under which this resource is to be localized to */
+ private Path destDirPath;
+
private static final FsPermission cachePerms = new FsPermission(
(short) 0755);
static final FsPermission PUBLIC_FILE_PERMS = new FsPermission((short) 0555);
@@ -65,10 +67,11 @@ public class FSDownload implements Calla
static final FsPermission PUBLIC_DIR_PERMS = new FsPermission((short) 0755);
static final FsPermission PRIVATE_DIR_PERMS = new FsPermission((short) 0700);
+
public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf,
- LocalDirAllocator dirs, LocalResource resource, Random rand) {
+ Path destDirPath, LocalResource resource, Random rand) {
this.conf = conf;
- this.dirs = dirs;
+ this.destDirPath = destDirPath;
this.files = files;
this.userUgi = ugi;
this.resource = resource;
@@ -136,15 +139,13 @@ public class FSDownload implements Calla
}
Path tmp;
- Path dst =
- dirs.getLocalPathForWrite(".", getEstimatedSize(resource),
- conf);
do {
- tmp = new Path(dst, String.valueOf(rand.nextLong()));
+ tmp = new Path(destDirPath, String.valueOf(rand.nextLong()));
} while (files.util().exists(tmp));
- dst = tmp;
- files.mkdir(dst, cachePerms, false);
- final Path dst_work = new Path(dst + "_tmp");
+ destDirPath = tmp;
+
+ files.mkdir(destDirPath, cachePerms, false);
+ final Path dst_work = new Path(destDirPath + "_tmp");
files.mkdir(dst_work, cachePerms, false);
Path dFinal = files.makeQualified(new Path(dst_work, sCopy.getName()));
@@ -158,9 +159,9 @@ public class FSDownload implements Calla
});
unpack(new File(dTmp.toUri()), new File(dFinal.toUri()));
changePermissions(dFinal.getFileSystem(conf), dFinal);
- files.rename(dst_work, dst, Rename.OVERWRITE);
+ files.rename(dst_work, destDirPath, Rename.OVERWRITE);
} catch (Exception e) {
- try { files.delete(dst, true); } catch (IOException ignore) { }
+ try { files.delete(destDirPath, true); } catch (IOException ignore) { }
throw e;
} finally {
try {
@@ -170,9 +171,8 @@ public class FSDownload implements Calla
rand = null;
conf = null;
resource = null;
- dirs = null;
}
- return files.makeQualified(new Path(dst, sCopy.getName()));
+ return files.makeQualified(new Path(destDirPath, sCopy.getName()));
}
/**
@@ -221,17 +221,4 @@ public class FSDownload implements Calla
}
}
- private static long getEstimatedSize(LocalResource rsrc) {
- if (rsrc.getSize() < 0) {
- return -1;
- }
- switch (rsrc.getType()) {
- case ARCHIVE:
- return 5 * rsrc.getSize();
- case FILE:
- default:
- return rsrc.getSize();
- }
- }
-
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java Tue Nov 29 23:28:16 2011
@@ -146,13 +146,14 @@ public class TestFSDownload {
vis = LocalResourceVisibility.APPLICATION;
break;
}
-
- LocalResource rsrc = createFile(files, new Path(basedir, "" + i),
- sizes[i], rand, vis);
+ Path p = new Path(basedir, "" + i);
+ LocalResource rsrc = createFile(files, p, sizes[i], rand, vis);
rsrcVis.put(rsrc, vis);
+ Path destPath = dirs.getLocalPathForWrite(
+ basedir.toString(), sizes[i], conf);
FSDownload fsd =
new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
- dirs, rsrc, new Random(sharedSeed));
+ destPath, rsrc, new Random(sharedSeed));
pending.put(rsrc, exec.submit(fsd));
}
@@ -249,13 +250,15 @@ public class TestFSDownload {
vis = LocalResourceVisibility.APPLICATION;
break;
}
-
- LocalResource rsrc = createJar(files, new Path(basedir, "dir" + i
- + ".jar"), vis);
+
+ Path p = new Path(basedir, "dir" + i + ".jar");
+ LocalResource rsrc = createJar(files, p, vis);
rsrcVis.put(rsrc, vis);
+ Path destPath = dirs.getLocalPathForWrite(
+ basedir.toString(), conf);
FSDownload fsd =
new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
- dirs, rsrc, new Random(sharedSeed));
+ destPath, rsrc, new Random(sharedSeed));
pending.put(rsrc, exec.submit(fsd));
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml Tue Nov 29 23:28:16 2011
@@ -389,6 +389,22 @@
</property>
<property>
+ <description>Frequency of running disk health checker code.</description>
+ <name>yarn.nodemanager.disk-health-checker.interval-ms</name>
+ <value>120000</value>
+ </property>
+
+ <property>
+ <description>The minimum fraction of number of disks to be healthy for the
+ nodemanager to launch new containers. This correspond to both
+ yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs. i.e. If there
+ are less number of healthy local-dirs (or log-dirs) available, then
+ new containers will not be launched on this node.</description>
+ <name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
+ <value>0.25</value>
+ </property>
+
+ <property>
<description>The path to the Linux container executor.</description>
<name>yarn.nodemanager.linux-container-executor.path</name>
</property>
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java Tue Nov 29 23:28:16 2011
@@ -45,6 +45,7 @@ public abstract class ContainerExecutor
FsPermission.createImmutable((short) 0700);
private Configuration conf;
+
private ConcurrentMap<ContainerId, Path> pidFiles =
new ConcurrentHashMap<ContainerId, Path>();
@@ -68,7 +69,7 @@ public abstract class ContainerExecutor
* @throws IOException
*/
public abstract void init() throws IOException;
-
+
/**
* Prepare the environment for containers in this application to execute.
* For $x in local.dirs
@@ -82,12 +83,14 @@ public abstract class ContainerExecutor
* @param appId id of the application
* @param nmPrivateContainerTokens path to localized credentials, rsrc by NM
* @param nmAddr RPC address to contact NM
+ * @param localDirs nm-local-dirs
+ * @param logDirs nm-log-dirs
* @throws IOException For most application init failures
* @throws InterruptedException If application init thread is halted by NM
*/
public abstract void startLocalizer(Path nmPrivateContainerTokens,
InetSocketAddress nmAddr, String user, String appId, String locId,
- List<Path> localDirs)
+ List<String> localDirs, List<String> logDirs)
throws IOException, InterruptedException;
@@ -100,12 +103,15 @@ public abstract class ContainerExecutor
* @param user the user of the container
* @param appId the appId of the container
* @param containerWorkDir the work dir for the container
+ * @param localDirs nm-local-dirs to be used for this container
+ * @param logDirs nm-log-dirs to be used for this container
* @return the return status of the launch
* @throws IOException
*/
public abstract int launchContainer(Container container,
Path nmPrivateContainerScriptPath, Path nmPrivateTokensPath,
- String user, String appId, Path containerWorkDir) throws IOException;
+ String user, String appId, Path containerWorkDir, List<String> localDirs,
+ List<String> logDirs) throws IOException;
public abstract boolean signalContainer(String user, String pid,
Signal signal)
@@ -116,7 +122,8 @@ public abstract class ContainerExecutor
public enum ExitCode {
FORCE_KILLED(137),
- TERMINATED(143);
+ TERMINATED(143),
+ DISKS_FAILED(-101);
private final int code;
private ExitCode(int exitCode) {
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java Tue Nov 29 23:28:16 2011
@@ -26,6 +26,7 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.net.InetSocketAddress;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
@@ -39,7 +40,6 @@ import org.apache.hadoop.fs.permission.F
import org.apache.hadoop.util.Shell.ExitCodeException;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
@@ -77,16 +77,17 @@ public class DefaultContainerExecutor ex
@Override
public void startLocalizer(Path nmPrivateContainerTokensPath,
InetSocketAddress nmAddr, String user, String appId, String locId,
- List<Path> localDirs) throws IOException, InterruptedException {
+ List<String> localDirs, List<String> logDirs)
+ throws IOException, InterruptedException {
ContainerLocalizer localizer =
- new ContainerLocalizer(this.lfs, user, appId, locId,
- localDirs, RecordFactoryProvider.getRecordFactory(getConf()));
+ new ContainerLocalizer(lfs, user, appId, locId, getPaths(localDirs),
+ RecordFactoryProvider.getRecordFactory(getConf()));
createUserLocalDirs(localDirs, user);
createUserCacheDirs(localDirs, user);
createAppDirs(localDirs, user, appId);
- createAppLogDirs(appId);
+ createAppLogDirs(appId, logDirs);
// TODO: Why pick first app dir. The same in LCE why not random?
Path appStorageDir = getFirstApplicationDir(localDirs, user, appId);
@@ -104,8 +105,8 @@ public class DefaultContainerExecutor ex
@Override
public int launchContainer(Container container,
Path nmPrivateContainerScriptPath, Path nmPrivateTokensPath,
- String userName, String appId, Path containerWorkDir)
- throws IOException {
+ String userName, String appId, Path containerWorkDir,
+ List<String> localDirs, List<String> logDirs) throws IOException {
ContainerId containerId = container.getContainerID();
@@ -115,10 +116,7 @@ public class DefaultContainerExecutor ex
ConverterUtils.toString(
container.getContainerID().getApplicationAttemptId().
getApplicationId());
- String[] sLocalDirs = getConf().getStrings(
- YarnConfiguration.NM_LOCAL_DIRS,
- YarnConfiguration.DEFAULT_NM_LOCAL_DIRS);
- for (String sLocalDir : sLocalDirs) {
+ for (String sLocalDir : localDirs) {
Path usersdir = new Path(sLocalDir, ContainerLocalizer.USERCACHE);
Path userdir = new Path(usersdir, userName);
Path appCacheDir = new Path(userdir, ContainerLocalizer.APPCACHE);
@@ -128,7 +126,7 @@ public class DefaultContainerExecutor ex
}
// Create the container log-dirs on all disks
- createContainerLogDirs(appIdStr, containerIdStr);
+ createContainerLogDirs(appIdStr, containerIdStr, logDirs);
// copy launch script to work dir
Path launchDst =
@@ -299,9 +297,9 @@ public class DefaultContainerExecutor ex
* $logdir/$user/$appId */
private static final short LOGDIR_PERM = (short)0710;
- private Path getFirstApplicationDir(List<Path> localDirs, String user,
+ private Path getFirstApplicationDir(List<String> localDirs, String user,
String appId) {
- return getApplicationDir(localDirs.get(0), user, appId);
+ return getApplicationDir(new Path(localDirs.get(0)), user, appId);
}
private Path getApplicationDir(Path base, String user, String appId) {
@@ -328,14 +326,14 @@ public class DefaultContainerExecutor ex
* <li>$local.dir/usercache/$user</li>
* </ul>
*/
- private void createUserLocalDirs(List<Path> localDirs, String user)
+ private void createUserLocalDirs(List<String> localDirs, String user)
throws IOException {
boolean userDirStatus = false;
FsPermission userperms = new FsPermission(USER_PERM);
- for (Path localDir : localDirs) {
+ for (String localDir : localDirs) {
// create $local.dir/usercache/$user and its immediate parent
try {
- lfs.mkdir(getUserCacheDir(localDir, user), userperms, true);
+ lfs.mkdir(getUserCacheDir(new Path(localDir), user), userperms, true);
} catch (IOException e) {
LOG.warn("Unable to create the user directory : " + localDir, e);
continue;
@@ -357,7 +355,7 @@ public class DefaultContainerExecutor ex
* <li>$local.dir/usercache/$user/filecache</li>
* </ul>
*/
- private void createUserCacheDirs(List<Path> localDirs, String user)
+ private void createUserCacheDirs(List<String> localDirs, String user)
throws IOException {
LOG.info("Initializing user " + user);
@@ -366,9 +364,10 @@ public class DefaultContainerExecutor ex
FsPermission appCachePerms = new FsPermission(APPCACHE_PERM);
FsPermission fileperms = new FsPermission(FILECACHE_PERM);
- for (Path localDir : localDirs) {
+ for (String localDir : localDirs) {
// create $local.dir/usercache/$user/appcache
- final Path appDir = getAppcacheDir(localDir, user);
+ Path localDirPath = new Path(localDir);
+ final Path appDir = getAppcacheDir(localDirPath, user);
try {
lfs.mkdir(appDir, appCachePerms, true);
appcacheDirStatus = true;
@@ -376,7 +375,7 @@ public class DefaultContainerExecutor ex
LOG.warn("Unable to create app cache directory : " + appDir, e);
}
// create $local.dir/usercache/$user/filecache
- final Path distDir = getFileCacheDir(localDir, user);
+ final Path distDir = getFileCacheDir(localDirPath, user);
try {
lfs.mkdir(distDir, fileperms, true);
distributedCacheDirStatus = true;
@@ -403,12 +402,12 @@ public class DefaultContainerExecutor ex
* </ul>
* @param localDirs
*/
- private void createAppDirs(List<Path> localDirs, String user, String appId)
+ private void createAppDirs(List<String> localDirs, String user, String appId)
throws IOException {
boolean initAppDirStatus = false;
FsPermission appperms = new FsPermission(APPDIR_PERM);
- for (Path localDir : localDirs) {
- Path fullAppDir = getApplicationDir(localDir, user, appId);
+ for (String localDir : localDirs) {
+ Path fullAppDir = getApplicationDir(new Path(localDir), user, appId);
// create $local.dir/usercache/$user/appcache/$appId
try {
lfs.mkdir(fullAppDir, appperms, true);
@@ -427,15 +426,12 @@ public class DefaultContainerExecutor ex
/**
* Create application log directories on all disks.
*/
- private void createAppLogDirs(String appId)
+ private void createAppLogDirs(String appId, List<String> logDirs)
throws IOException {
- String[] rootLogDirs =
- getConf()
- .getStrings(YarnConfiguration.NM_LOG_DIRS, YarnConfiguration.DEFAULT_NM_LOG_DIRS);
-
+
boolean appLogDirStatus = false;
FsPermission appLogDirPerms = new FsPermission(LOGDIR_PERM);
- for (String rootLogDir : rootLogDirs) {
+ for (String rootLogDir : logDirs) {
// create $log.dir/$appid
Path appLogDir = new Path(rootLogDir, appId);
try {
@@ -455,15 +451,12 @@ public class DefaultContainerExecutor ex
/**
* Create application log directories on all disks.
*/
- private void createContainerLogDirs(String appId, String containerId)
- throws IOException {
- String[] rootLogDirs =
- getConf()
- .getStrings(YarnConfiguration.NM_LOG_DIRS, YarnConfiguration.DEFAULT_NM_LOG_DIRS);
-
+ private void createContainerLogDirs(String appId, String containerId,
+ List<String> logDirs) throws IOException {
+
boolean containerLogDirStatus = false;
FsPermission containerLogDirPerms = new FsPermission(LOGDIR_PERM);
- for (String rootLogDir : rootLogDirs) {
+ for (String rootLogDir : logDirs) {
// create $log.dir/$appid/$containerid
Path appLogDir = new Path(rootLogDir, appId);
Path containerLogDir = new Path(appLogDir, containerId);
@@ -483,4 +476,15 @@ public class DefaultContainerExecutor ex
+ containerId);
}
}
+
+ /**
+ * @return the list of paths of given local directories
+ */
+ private static List<Path> getPaths(List<String> dirs) {
+ List<Path> paths = new ArrayList<Path>(dirs.size());
+ for (int i = 0; i < dirs.size(); i++) {
+ paths.add(new Path(dirs.get(i)));
+ }
+ return paths;
+ }
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java Tue Nov 29 23:28:16 2011
@@ -126,13 +126,18 @@ public class LinuxContainerExecutor exte
@Override
public void startLocalizer(Path nmPrivateContainerTokensPath,
InetSocketAddress nmAddr, String user, String appId, String locId,
- List<Path> localDirs) throws IOException, InterruptedException {
+ List<String> localDirs, List<String> logDirs)
+ throws IOException, InterruptedException {
+
List<String> command = new ArrayList<String>(
Arrays.asList(containerExecutorExe,
user,
Integer.toString(Commands.INITIALIZE_CONTAINER.getValue()),
appId,
- nmPrivateContainerTokensPath.toUri().getPath().toString()));
+ nmPrivateContainerTokensPath.toUri().getPath().toString(),
+ StringUtils.join(",", localDirs),
+ StringUtils.join(",", logDirs)));
+
File jvm = // use same jvm as parent
new File(new File(System.getProperty("java.home"), "bin"), "java");
command.add(jvm.toString());
@@ -148,8 +153,8 @@ public class LinuxContainerExecutor exte
command.add(locId);
command.add(nmAddr.getHostName());
command.add(Integer.toString(nmAddr.getPort()));
- for (Path p : localDirs) {
- command.add(p.toUri().getPath().toString());
+ for (String dir : localDirs) {
+ command.add(dir);
}
String[] commandArray = command.toArray(new String[command.size()]);
ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
@@ -174,7 +179,8 @@ public class LinuxContainerExecutor exte
@Override
public int launchContainer(Container container,
Path nmPrivateCotainerScriptPath, Path nmPrivateTokensPath,
- String user, String appId, Path containerWorkDir) throws IOException {
+ String user, String appId, Path containerWorkDir,
+ List<String> localDirs, List<String> logDirs) throws IOException {
ContainerId containerId = container.getContainerID();
String containerIdStr = ConverterUtils.toString(containerId);
@@ -189,8 +195,10 @@ public class LinuxContainerExecutor exte
.toString(Commands.LAUNCH_CONTAINER.getValue()), appId,
containerIdStr, containerWorkDir.toString(),
nmPrivateCotainerScriptPath.toUri().getPath().toString(),
- nmPrivateTokensPath.toUri().getPath().toString(), pidFilePath
- .toString()));
+ nmPrivateTokensPath.toUri().getPath().toString(),
+ pidFilePath.toString(),
+ StringUtils.join(",", localDirs),
+ StringUtils.join(",", logDirs)));
String[] commandArray = command.toArray(new String[command.size()]);
shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd
container.getLaunchContext().getEnvironment()); // sanitized env
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java Tue Nov 29 23:28:16 2011
@@ -25,7 +25,6 @@ import java.util.concurrent.ConcurrentSk
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthCheckerService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.SecurityUtil;
@@ -59,6 +58,8 @@ public class NodeManager extends Composi
protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
protected ContainerTokenSecretManager containerTokenSecretManager;
private ApplicationACLsManager aclsManager;
+ private NodeHealthCheckerService nodeHealthChecker;
+ private LocalDirsHandlerService dirsHandler;
public NodeManager() {
super(NodeManager.class.getName());
@@ -78,14 +79,16 @@ public class NodeManager extends Composi
protected ContainerManagerImpl createContainerManager(Context context,
ContainerExecutor exec, DeletionService del,
NodeStatusUpdater nodeStatusUpdater, ContainerTokenSecretManager
- containerTokenSecretManager, ApplicationACLsManager aclsManager) {
+ containerTokenSecretManager, ApplicationACLsManager aclsManager,
+ LocalDirsHandlerService dirsHandler) {
return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
- metrics, containerTokenSecretManager, aclsManager);
+ metrics, containerTokenSecretManager, aclsManager, dirsHandler);
}
protected WebServer createWebServer(Context nmContext,
- ResourceView resourceView, ApplicationACLsManager aclsManager) {
- return new WebServer(nmContext, resourceView, aclsManager);
+ ResourceView resourceView, ApplicationACLsManager aclsManager,
+ LocalDirsHandlerService dirsHandler) {
+ return new WebServer(nmContext, resourceView, aclsManager, dirsHandler);
}
protected void doSecureLogin() throws IOException {
@@ -121,16 +124,12 @@ public class NodeManager extends Composi
// NodeManager level dispatcher
AsyncDispatcher dispatcher = new AsyncDispatcher();
- NodeHealthCheckerService healthChecker = null;
- if (NodeHealthCheckerService.shouldRun(conf)) {
- healthChecker = new NodeHealthCheckerService();
- addService(healthChecker);
- }
+ nodeHealthChecker = new NodeHealthCheckerService();
+ addService(nodeHealthChecker);
+ dirsHandler = nodeHealthChecker.getDiskHandler();
- NodeStatusUpdater nodeStatusUpdater =
- createNodeStatusUpdater(context, dispatcher, healthChecker,
- this.containerTokenSecretManager);
-
+ NodeStatusUpdater nodeStatusUpdater = createNodeStatusUpdater(context,
+ dispatcher, nodeHealthChecker, this.containerTokenSecretManager);
nodeStatusUpdater.register(this);
NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
@@ -138,11 +137,11 @@ public class NodeManager extends Composi
ContainerManagerImpl containerManager =
createContainerManager(context, exec, del, nodeStatusUpdater,
- this.containerTokenSecretManager, this.aclsManager);
+ this.containerTokenSecretManager, this.aclsManager, dirsHandler);
addService(containerManager);
Service webServer = createWebServer(context, containerManager
- .getContainersMonitor(), this.aclsManager);
+ .getContainersMonitor(), this.aclsManager, dirsHandler);
addService(webServer);
dispatcher.register(ContainerManagerEventType.class, containerManager);
@@ -215,7 +214,14 @@ public class NodeManager extends Composi
}
}
-
+
+ /**
+ * @return the node health checker
+ */
+ public NodeHealthCheckerService getNodeHealthChecker() {
+ return nodeHealthChecker;
+ }
+
@Override
public void stateChanged(Service service) {
// Shutdown the Nodemanager when the NodeStatusUpdater is stopped.
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java Tue Nov 29 23:28:16 2011
@@ -27,7 +27,6 @@ import java.util.Map.Entry;
import org.apache.avro.AvroRuntimeException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthCheckerService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
@@ -222,11 +221,14 @@ public class NodeStatusUpdaterImpl exten
+ numActiveContainers + " containers");
NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
- if (this.healthChecker != null) {
- this.healthChecker.setHealthStatus(nodeHealthStatus);
+ nodeHealthStatus.setHealthReport(healthChecker.getHealthReport());
+ nodeHealthStatus.setIsNodeHealthy(healthChecker.isHealthy());
+ nodeHealthStatus.setLastHealthReportTime(
+ healthChecker.getLastHealthReportTime());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy()
+ + ", " + nodeHealthStatus.getHealthReport());
}
- LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy()
- + ", " + nodeHealthStatus.getHealthReport());
nodeStatus.setNodeHealthStatus(nodeHealthStatus);
return nodeStatus;
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java Tue Nov 29 23:28:16 2011
@@ -68,6 +68,7 @@ import org.apache.hadoop.yarn.server.nod
import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
@@ -120,7 +121,8 @@ public class ContainerManagerImpl extend
private ContainerTokenSecretManager containerTokenSecretManager;
private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-
+
+ protected LocalDirsHandlerService dirsHandler;
protected final AsyncDispatcher dispatcher;
private final ApplicationACLsManager aclsManager;
@@ -129,9 +131,12 @@ public class ContainerManagerImpl extend
public ContainerManagerImpl(Context context, ContainerExecutor exec,
DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
NodeManagerMetrics metrics, ContainerTokenSecretManager
- containerTokenSecretManager, ApplicationACLsManager aclsManager) {
+ containerTokenSecretManager, ApplicationACLsManager aclsManager,
+ LocalDirsHandlerService dirsHandler) {
super(ContainerManagerImpl.class.getName());
this.context = context;
+ this.dirsHandler = dirsHandler;
+
dispatcher = new AsyncDispatcher();
this.deletionService = deletionContext;
this.metrics = metrics;
@@ -190,9 +195,10 @@ public class ContainerManagerImpl extend
if (conf.getBoolean(YarnConfiguration.NM_LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_NM_LOG_AGGREGATION_ENABLED)) {
return new LogAggregationService(this.dispatcher, context,
- deletionService);
+ deletionService, dirsHandler);
} else {
- return new NonAggregatingLogHandler(this.dispatcher, deletionService);
+ return new NonAggregatingLogHandler(this.dispatcher, deletionService,
+ dirsHandler);
}
}
@@ -203,12 +209,12 @@ public class ContainerManagerImpl extend
protected ResourceLocalizationService createResourceLocalizationService(
ContainerExecutor exec, DeletionService deletionContext) {
return new ResourceLocalizationService(this.dispatcher, exec,
- deletionContext);
+ deletionContext, dirsHandler);
}
protected ContainersLauncher createContainersLauncher(Context context,
ContainerExecutor exec) {
- return new ContainersLauncher(context, this.dispatcher, exec);
+ return new ContainersLauncher(context, this.dispatcher, exec, dirsHandler);
}
@Override
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java Tue Nov 29 23:28:16 2011
@@ -22,14 +22,20 @@ import org.apache.hadoop.yarn.api.record
public class ContainerExitEvent extends ContainerEvent {
private int exitCode;
+ private final String diagnosticInfo;
public ContainerExitEvent(ContainerId cID, ContainerEventType eventType,
- int exitCode) {
+ int exitCode, String diagnosticInfo) {
super(cID, eventType);
this.exitCode = exitCode;
+ this.diagnosticInfo = diagnosticInfo;
}
public int getExitCode() {
return this.exitCode;
}
+
+ public String getDiagnosticInfo() {
+ return diagnosticInfo;
+ }
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java Tue Nov 29 23:28:16 2011
@@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.api.record
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.DelayedProcessKiller;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
@@ -78,7 +79,6 @@ public class ContainerLaunch implements
private final Application app;
private final Container container;
private final Configuration conf;
- private final LocalDirAllocator logDirsSelector;
private volatile AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false);
private volatile AtomicBoolean completed = new AtomicBoolean(false);
@@ -88,14 +88,17 @@ public class ContainerLaunch implements
private Path pidFilePath = null;
+ private final LocalDirsHandlerService dirsHandler;
+
public ContainerLaunch(Configuration configuration, Dispatcher dispatcher,
- ContainerExecutor exec, Application app, Container container) {
+ ContainerExecutor exec, Application app, Container container,
+ LocalDirsHandlerService dirsHandler) {
this.conf = configuration;
this.app = app;
this.exec = exec;
this.container = container;
this.dispatcher = dispatcher;
- this.logDirsSelector = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS);
+ this.dirsHandler = dirsHandler;
this.sleepDelayBeforeSigKill =
conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS);
@@ -121,9 +124,8 @@ public class ContainerLaunch implements
List<String> newCmds = new ArrayList<String>(command.size());
String appIdStr = app.getAppId().toString();
Path containerLogDir =
- this.logDirsSelector.getLocalPathForWrite(ContainerLaunch
- .getRelativeContainerLogDir(appIdStr, containerIdStr),
- LocalDirAllocator.SIZE_UNKNOWN, this.conf, false);
+ dirsHandler.getLogPathForWrite(ContainerLaunch
+ .getRelativeContainerLogDir(appIdStr, containerIdStr), false);
for (String str : command) {
// TODO: Should we instead work via symlinks without this grammar?
newCmds.add(str.replace(ApplicationConstants.LOG_DIR_EXPANSION_VAR,
@@ -144,47 +146,49 @@ public class ContainerLaunch implements
// /////////////////////////// End of variable expansion
FileContext lfs = FileContext.getLocalFSFileContext();
- LocalDirAllocator lDirAllocator =
- new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS); // TODO
Path nmPrivateContainerScriptPath =
- lDirAllocator.getLocalPathForWrite(
+ dirsHandler.getLocalPathForWrite(
getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
- + CONTAINER_SCRIPT, this.conf);
+ + CONTAINER_SCRIPT);
Path nmPrivateTokensPath =
- lDirAllocator.getLocalPathForWrite(
+ dirsHandler.getLocalPathForWrite(
getContainerPrivateDir(appIdStr, containerIdStr)
+ Path.SEPARATOR
+ String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
- containerIdStr), this.conf);
+ containerIdStr));
DataOutputStream containerScriptOutStream = null;
DataOutputStream tokensOutStream = null;
// Select the working directory for the container
Path containerWorkDir =
- lDirAllocator.getLocalPathForWrite(ContainerLocalizer.USERCACHE
+ dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE
+ Path.SEPARATOR + user + Path.SEPARATOR
+ ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr
+ Path.SEPARATOR + containerIdStr,
- LocalDirAllocator.SIZE_UNKNOWN, this.conf, false);
+ LocalDirAllocator.SIZE_UNKNOWN, false);
String pidFileSuffix = String.format(ContainerLaunch.PID_FILE_NAME_FMT,
containerIdStr);
// pid file should be in nm private dir so that it is not
// accessible by users
- pidFilePath = lDirAllocator.getLocalPathForWrite(
+ pidFilePath = dirsHandler.getLocalPathForWrite(
ResourceLocalizationService.NM_PRIVATE_DIR + Path.SEPARATOR
- + pidFileSuffix,
- this.conf);
+ + pidFileSuffix);
+ List<String> localDirs = dirsHandler.getLocalDirs();
+ List<String> logDirs = dirsHandler.getLogDirs();
+
+ if (!dirsHandler.areDisksHealthy()) {
+ ret = ExitCode.DISKS_FAILED.getExitCode();
+ throw new IOException("Most of the disks failed. "
+ + dirsHandler.getDisksHealthReport());
+ }
try {
// /////////// Write out the container-script in the nmPrivate space.
- String[] localDirs =
- this.conf.getStrings(YarnConfiguration.NM_LOCAL_DIRS,
- YarnConfiguration.DEFAULT_NM_LOCAL_DIRS);
- List<Path> appDirs = new ArrayList<Path>(localDirs.length);
+ List<Path> appDirs = new ArrayList<Path>(localDirs.size());
for (String localDir : localDirs) {
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
Path userdir = new Path(usersdir, user);
@@ -234,30 +238,34 @@ public class ContainerLaunch implements
}
else {
exec.activateContainer(containerID, pidFilePath);
- ret =
- exec.launchContainer(container, nmPrivateContainerScriptPath,
- nmPrivateTokensPath, user, appIdStr, containerWorkDir);
+ ret = exec.launchContainer(container, nmPrivateContainerScriptPath,
+ nmPrivateTokensPath, user, appIdStr, containerWorkDir,
+ localDirs, logDirs);
}
} catch (Throwable e) {
- LOG.warn("Failed to launch container", e);
+ LOG.warn("Failed to launch container.", e);
dispatcher.getEventHandler().handle(new ContainerExitEvent(
launchContext.getContainerId(),
- ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret));
+ ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
+ e.getMessage()));
return ret;
} finally {
completed.set(true);
exec.deactivateContainer(containerID);
}
- LOG.debug("Container " + containerIdStr + " completed with exit code "
- + ret);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Container " + containerIdStr + " completed with exit code "
+ + ret);
+ }
if (ret == ExitCode.FORCE_KILLED.getExitCode()
|| ret == ExitCode.TERMINATED.getExitCode()) {
// If the process was killed, Send container_cleanedup_after_kill and
// just break out of this method.
dispatcher.getEventHandler().handle(
new ContainerExitEvent(launchContext.getContainerId(),
- ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret));
+ ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret,
+ "Container exited with a non-zero exit code " + ret));
return ret;
}
@@ -265,7 +273,8 @@ public class ContainerLaunch implements
LOG.warn("Container exited with a non-zero exit code " + ret);
this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
launchContext.getContainerId(),
- ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret));
+ ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
+ "Container exited with a non-zero exit code " + ret));
return ret;
}
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java Tue Nov 29 23:28:16 2011
@@ -33,10 +33,10 @@ import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.yarn.YarnException;
import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
@@ -59,6 +59,8 @@ public class ContainersLauncher extends
private final Context context;
private final ContainerExecutor exec;
private final Dispatcher dispatcher;
+
+ private LocalDirsHandlerService dirsHandler;
private final ExecutorService containerLauncher =
Executors.newCachedThreadPool(
new ThreadFactoryBuilder()
@@ -80,11 +82,12 @@ public class ContainersLauncher extends
public ContainersLauncher(Context context, Dispatcher dispatcher,
- ContainerExecutor exec) {
+ ContainerExecutor exec, LocalDirsHandlerService dirsHandler) {
super("containers-launcher");
this.exec = exec;
this.context = context;
this.dispatcher = dispatcher;
+ this.dirsHandler = dirsHandler;
}
@Override
@@ -114,15 +117,19 @@ public class ContainersLauncher extends
Application app =
context.getApplications().get(
containerId.getApplicationAttemptId().getApplicationId());
- ContainerLaunch launch =
- new ContainerLaunch(getConfig(), dispatcher, exec, app,
- event.getContainer());
+
+ ContainerLaunch launch = new ContainerLaunch(getConfig(), dispatcher,
+ exec, app, event.getContainer(), dirsHandler);
running.put(containerId,
new RunningContainer(containerLauncher.submit(launch),
launch));
break;
case CLEANUP_CONTAINER:
RunningContainer rContainerDatum = running.remove(containerId);
+ if (rContainerDatum == null) {
+ // Container not launched. So nothing needs to be done.
+ return;
+ }
Future<Integer> rContainer = rContainerDatum.runningcontainer;
if (rContainer != null
&& !rContainer.isDone()) {
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java?rev=1208135&r1=1208134&r2=1208135&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java Tue Nov 29 23:28:16 2011
@@ -45,12 +45,10 @@ import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.security.SecurityInfo;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.records.LocalResource;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -61,7 +59,6 @@ import org.apache.hadoop.yarn.server.nod
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse;
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerSecurityInfo;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager;
import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -186,16 +183,30 @@ public class ContainerLocalizer {
}
Callable<Path> download(LocalDirAllocator lda, LocalResource rsrc,
- UserGroupInformation ugi) {
- return new FSDownload(lfs, ugi, conf, lda, rsrc, new Random());
+ UserGroupInformation ugi) throws IOException {
+ Path destPath = lda.getLocalPathForWrite(".", getEstimatedSize(rsrc), conf);
+ return new FSDownload(lfs, ugi, conf, destPath, rsrc, new Random());
+ }
+
+ static long getEstimatedSize(LocalResource rsrc) {
+ if (rsrc.getSize() < 0) {
+ return -1;
+ }
+ switch (rsrc.getType()) {
+ case ARCHIVE:
+ return 5 * rsrc.getSize();
+ case FILE:
+ default:
+ return rsrc.getSize();
+ }
}
void sleep(int duration) throws InterruptedException {
TimeUnit.SECONDS.sleep(duration);
}
- private void localizeFiles(LocalizationProtocol nodemanager, ExecutorService exec,
- UserGroupInformation ugi) {
+ private void localizeFiles(LocalizationProtocol nodemanager,
+ ExecutorService exec, UserGroupInformation ugi) throws IOException {
while (true) {
try {
LocalizerStatus status = createStatus();