You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zj...@apache.org on 2015/06/30 01:30:54 UTC
[38/50] [abbrv] hadoop git commit: YARN-3850. NM fails to read files
from full disks which can lead to container logs being lost and other issues.
Contributed by Varun Saxena
YARN-3850. NM fails to read files from full disks which can lead to container logs being lost and other issues. Contributed by Varun Saxena
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/faf4a118
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/faf4a118
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/faf4a118
Branch: refs/heads/YARN-2928
Commit: faf4a11884ce6dee8e276c1c1f4f5f7152e0ecf0
Parents: 04fdf61
Author: Jason Lowe <jl...@apache.org>
Authored: Fri Jun 26 15:47:07 2015 +0000
Committer: Zhijie Shen <zj...@apache.org>
Committed: Mon Jun 29 10:28:27 2015 -0700
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 ++
.../nodemanager/LocalDirsHandlerService.java | 24 +++++++++
.../launcher/RecoveredContainerLaunch.java | 3 +-
.../logaggregation/AppLogAggregatorImpl.java | 4 +-
.../nodemanager/webapp/ContainerLogsUtils.java | 2 +-
.../TestLogAggregationService.java | 54 +++++++++++++++-----
.../webapp/TestContainerLogsPage.java | 22 +++++++-
7 files changed, 93 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index f093a2c..6e13999 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -819,6 +819,9 @@ Release 2.7.1 - UNRELEASED
YARN-3832. Resource Localization fails on a cluster due to existing cache
directories (Brahma Reddy Battula via jlowe)
+ YARN-3850. NM fails to read files from full disks which can lead to
+ container logs being lost and other issues (Varun Saxena via jlowe)
+
Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
index 57d4395..0a61035 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
@@ -238,6 +238,18 @@ public class LocalDirsHandlerService extends AbstractService {
}
/**
+ * Function to get the local dirs which should be considered for reading
+ * existing files on disk. Contains the good local dirs and the local dirs
+ * that have reached the disk space limit
+ *
+ * @return the local dirs which should be considered for reading
+ */
+ public List<String> getLocalDirsForRead() {
+ return DirectoryCollection.concat(localDirs.getGoodDirs(),
+ localDirs.getFullDirs());
+ }
+
+ /**
* Function to get the local dirs which should be considered when cleaning up
* resources. Contains the good local dirs and the local dirs that have reached
* the disk space limit
@@ -250,6 +262,18 @@ public class LocalDirsHandlerService extends AbstractService {
}
/**
+ * Function to get the log dirs which should be considered for reading
+ * existing files on disk. Contains the good log dirs and the log dirs that
+ * have reached the disk space limit
+ *
+ * @return the log dirs which should be considered for reading
+ */
+ public List<String> getLogDirsForRead() {
+ return DirectoryCollection.concat(logDirs.getGoodDirs(),
+ logDirs.getFullDirs());
+ }
+
+ /**
* Function to get the log dirs which should be considered when cleaning up
* resources. Contains the good log dirs and the log dirs that have reached
* the disk space limit
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
index fb10f22..d7b9ae2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java
@@ -126,7 +126,8 @@ public class RecoveredContainerLaunch extends ContainerLaunch {
private File locatePidFile(String appIdStr, String containerIdStr) {
String pidSubpath= getPidFileSubpath(appIdStr, containerIdStr);
- for (String dir : getContext().getLocalDirsHandler().getLocalDirs()) {
+ for (String dir : getContext().getLocalDirsHandler().
+ getLocalDirsForRead()) {
File pidFile = new File(dir, pidSubpath);
if (pidFile.exists()) {
return pidFile;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
index 81be813..4b95a03 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
@@ -595,10 +595,10 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
boolean appFinished) {
LOG.info("Uploading logs for container " + containerId
+ ". Current good log dirs are "
- + StringUtils.join(",", dirsHandler.getLogDirs()));
+ + StringUtils.join(",", dirsHandler.getLogDirsForRead()));
final LogKey logKey = new LogKey(containerId);
final LogValue logValue =
- new LogValue(dirsHandler.getLogDirs(), containerId,
+ new LogValue(dirsHandler.getLogDirsForRead(), containerId,
userUgi.getShortUserName(), logAggregationContext,
this.uploadedFileMeta, appFinished);
try {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
index c588a89..319f49b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
@@ -74,7 +74,7 @@ public class ContainerLogsUtils {
static List<File> getContainerLogDirs(ContainerId containerId,
LocalDirsHandlerService dirsHandler) throws YarnException {
- List<String> logDirs = dirsHandler.getLogDirs();
+ List<String> logDirs = dirsHandler.getLogDirsForRead();
List<File> containerLogDirs = new ArrayList<File>(logDirs.size());
for (String logDir : logDirs) {
logDir = new File(logDir).toURI().getPath();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
index eb0d055..fd97cef 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
@@ -177,22 +177,11 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
dispatcher.close();
}
- @Test
- public void testLocalFileDeletionAfterUpload() throws Exception {
- this.delSrvc = new DeletionService(createContainerExecutor());
- delSrvc = spy(delSrvc);
- this.delSrvc.init(conf);
- this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
- this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
- this.remoteRootLogDir.getAbsolutePath());
-
- LogAggregationService logAggregationService = spy(
- new LogAggregationService(dispatcher, this.context, this.delSrvc,
- super.dirsHandler));
+ private void verifyLocalFileDeletion(
+ LogAggregationService logAggregationService) throws Exception {
logAggregationService.init(this.conf);
logAggregationService.start();
-
ApplicationId application1 = BuilderUtils.newApplicationId(1234, 1);
// AppLogDir should be created
@@ -252,10 +241,47 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED)
};
- checkEvents(appEventHandler, expectedEvents, true, "getType", "getApplicationID");
+ checkEvents(appEventHandler, expectedEvents, true, "getType",
+ "getApplicationID");
}
@Test
+ public void testLocalFileDeletionAfterUpload() throws Exception {
+ this.delSrvc = new DeletionService(createContainerExecutor());
+ delSrvc = spy(delSrvc);
+ this.delSrvc.init(conf);
+ this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
+ this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
+ this.remoteRootLogDir.getAbsolutePath());
+
+ LogAggregationService logAggregationService = spy(
+ new LogAggregationService(dispatcher, this.context, this.delSrvc,
+ super.dirsHandler));
+ verifyLocalFileDeletion(logAggregationService);
+ }
+
+ @Test
+ public void testLocalFileDeletionOnDiskFull() throws Exception {
+ this.delSrvc = new DeletionService(createContainerExecutor());
+ delSrvc = spy(delSrvc);
+ this.delSrvc.init(conf);
+ this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
+ this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
+ this.remoteRootLogDir.getAbsolutePath());
+ List<String> logDirs = super.dirsHandler.getLogDirs();
+ LocalDirsHandlerService dirsHandler = spy(super.dirsHandler);
+ // Simulate disk being full by returning no good log dirs but having a
+ // directory in full log dirs.
+ when(dirsHandler.getLogDirs()).thenReturn(new ArrayList<String>());
+ when(dirsHandler.getLogDirsForRead()).thenReturn(logDirs);
+ LogAggregationService logAggregationService = spy(
+ new LogAggregationService(dispatcher, this.context, this.delSrvc,
+ dirsHandler));
+ verifyLocalFileDeletion(logAggregationService);
+ }
+
+
+ @Test
public void testNoContainerOnNode() throws Exception {
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
http://git-wip-us.apache.org/repos/asf/hadoop/blob/faf4a118/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
index b2788ca..4b1a20b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.webapp;
import static org.junit.Assume.assumeTrue;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.verify;
@@ -29,6 +30,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -122,6 +124,24 @@ public class TestContainerLogsPage {
Assert.assertNull(nmContext.getContainers().get(container1));
files = ContainerLogsUtils.getContainerLogDirs(container1, user, nmContext);
Assert.assertTrue(!(files.get(0).toString().contains("file:")));
+
+ // Create a new context to check if correct container log dirs are fetched
+ // on full disk.
+ LocalDirsHandlerService dirsHandlerForFullDisk = spy(dirsHandler);
+ // good log dirs are empty and nm log dir is in the full log dir list.
+ when(dirsHandlerForFullDisk.getLogDirs()).
+ thenReturn(new ArrayList<String>());
+ when(dirsHandlerForFullDisk.getLogDirsForRead()).
+ thenReturn(Arrays.asList(new String[] {absLogDir.getAbsolutePath()}));
+ nmContext = new NodeManager.NMContext(null, null, dirsHandlerForFullDisk,
+ new ApplicationACLsManager(conf), new NMNullStateStoreService());
+ nmContext.getApplications().put(appId, app);
+ container.setState(ContainerState.RUNNING);
+ nmContext.getContainers().put(container1, container);
+ List<File> dirs =
+ ContainerLogsUtils.getContainerLogDirs(container1, user, nmContext);
+ File containerLogDir = new File(absLogDir, appId + "/" + container1);
+ Assert.assertTrue(dirs.contains(containerLogDir));
}
@Test(timeout = 10000)
@@ -231,7 +251,7 @@ public class TestContainerLogsPage {
LocalDirsHandlerService localDirs = mock(LocalDirsHandlerService.class);
List<String> logDirs = new ArrayList<String>();
logDirs.add("F:/nmlogs");
- when(localDirs.getLogDirs()).thenReturn(logDirs);
+ when(localDirs.getLogDirsForRead()).thenReturn(logDirs);
ApplicationIdPBImpl appId = mock(ApplicationIdPBImpl.class);
when(appId.toString()).thenReturn("app_id_1");