You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ar...@apache.org on 2016/03/09 18:45:08 UTC

[08/34] hadoop git commit: YARN-4744. Too many signal to container failure in case of LCE. Contributed by Sidharta Seethana

YARN-4744. Too many signal to container failure in case of LCE. Contributed by Sidharta Seethana


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/059caf99
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/059caf99
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/059caf99

Branch: refs/heads/HDFS-1312
Commit: 059caf99891943d9587cac19b48e82efbed06b2d
Parents: fd1c09b
Author: Jason Lowe <jl...@apache.org>
Authored: Mon Mar 7 15:40:01 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Mon Mar 7 15:40:01 2016 +0000

----------------------------------------------------------------------
 .../nodemanager/LinuxContainerExecutor.java     |  4 ++--
 .../linux/privileged/PrivilegedOperation.java   | 23 +++++++++++++++++---
 .../privileged/PrivilegedOperationExecutor.java | 21 ++++++++++--------
 .../linux/resources/CGroupsHandlerImpl.java     |  2 +-
 .../linux/resources/TrafficController.java      |  2 +-
 .../runtime/DefaultLinuxContainerRuntime.java   | 11 ++++++----
 .../runtime/DockerLinuxContainerRuntime.java    |  5 ++---
 .../TestPrivilegedOperationExecutor.java        |  4 ++--
 .../linux/resources/TestCGroupsHandlerImpl.java |  2 +-
 9 files changed, 48 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 1367b13..5a48e09 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -181,7 +181,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
     // verify configuration/permissions and exit
     try {
       PrivilegedOperation checkSetupOp = new PrivilegedOperation(
-          PrivilegedOperation.OperationType.CHECK_SETUP, (String) null);
+          PrivilegedOperation.OperationType.CHECK_SETUP);
       PrivilegedOperationExecutor privilegedOperationExecutor =
           PrivilegedOperationExecutor.getInstance(conf);
 
@@ -242,7 +242,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
     verifyUsernamePattern(user);
     String runAsUser = getRunAsUser(user);
     PrivilegedOperation initializeContainerOp = new PrivilegedOperation(
-        PrivilegedOperation.OperationType.INITIALIZE_CONTAINER, (String) null);
+        PrivilegedOperation.OperationType.INITIALIZE_CONTAINER);
     List<String> prefixCommands = new ArrayList<>();
 
     addSchedPriorityCommand(prefixCommands);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
index cbbf7a8..259dee8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java
@@ -68,10 +68,16 @@ public class PrivilegedOperation {
 
   private final OperationType opType;
   private final List<String> args;
+  private boolean failureLogging;
 
-  public PrivilegedOperation(OperationType opType, String arg) {
+  public PrivilegedOperation(OperationType opType) {
     this.opType = opType;
     this.args = new ArrayList<String>();
+    this.failureLogging = true;
+  }
+
+  public PrivilegedOperation(OperationType opType, String arg) {
+    this(opType);
 
     if (arg != null) {
       this.args.add(arg);
@@ -79,8 +85,7 @@ public class PrivilegedOperation {
   }
 
   public PrivilegedOperation(OperationType opType, List<String> args) {
-    this.opType = opType;
-    this.args = new ArrayList<String>();
+    this(opType);
 
     if (args != null) {
       this.args.addAll(args);
@@ -97,6 +102,18 @@ public class PrivilegedOperation {
     this.args.addAll(args);
   }
 
+  public void enableFailureLogging() {
+    this.failureLogging = true;
+  }
+
+  public void disableFailureLogging() {
+    this.failureLogging = false;
+  }
+
+  public boolean isFailureLoggingEnabled() {
+    return failureLogging;
+  }
+
   public OperationType getOperationType() {
     return opType;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java
index 4b1bb9f..7370daa 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java
@@ -155,17 +155,20 @@ public class PrivilegedOperationExecutor {
         LOG.debug(exec.getOutput());
       }
     } catch (ExitCodeException e) {
-      StringBuilder logBuilder = new StringBuilder("Shell execution returned "
-          + "exit code: ")
-          .append(exec.getExitCode())
-          .append(". Privileged Execution Operation Output: ")
-          .append(System.lineSeparator()).append(exec.getOutput());
+      if (operation.isFailureLoggingEnabled()) {
 
-      logBuilder.append("Full command array for failed execution: ")
-          .append(System.lineSeparator());
-      logBuilder.append(Arrays.toString(fullCommandArray));
+        StringBuilder logBuilder = new StringBuilder("Shell execution returned "
+            + "exit code: ")
+            .append(exec.getExitCode())
+            .append(". Privileged Execution Operation Output: ")
+            .append(System.lineSeparator()).append(exec.getOutput());
 
-      LOG.warn(logBuilder.toString());
+        logBuilder.append("Full command array for failed execution: ")
+            .append(System.lineSeparator());
+        logBuilder.append(Arrays.toString(fullCommandArray));
+
+        LOG.warn(logBuilder.toString());
+      }
 
       //stderr from shell executor seems to be stuffed into the exception
       //'message' - so, we have to extract it and set it as the error out

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java
index 1ee0f8a..36bd468 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java
@@ -247,7 +247,7 @@ class CGroupsHandlerImpl implements CGroupsHandler {
             .append(controller.getName()).append('=').append(controllerPath);
         PrivilegedOperation.OperationType opType = PrivilegedOperation
             .OperationType.MOUNT_CGROUPS;
-        PrivilegedOperation op = new PrivilegedOperation(opType, (String) null);
+        PrivilegedOperation op = new PrivilegedOperation(opType);
 
         op.appendArgs(hierarchy, cGroupKV.toString());
         LOG.info("Mounting controller " + controller.getName() + " at " +

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java
index e33cea4..f1468fb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java
@@ -546,7 +546,7 @@ import java.util.regex.Pattern;
       case TC_MODIFY_STATE:
       case TC_READ_STATE:
       case TC_READ_STATS:
-        operation = new PrivilegedOperation(opType, (String) null);
+        operation = new PrivilegedOperation(opType);
         commands = new ArrayList<>();
         break;
       default:

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
index 633fa66..3862b92 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DefaultLinuxContainerRuntime.java
@@ -67,7 +67,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
       throws ContainerExecutionException {
     Container container = ctx.getContainer();
     PrivilegedOperation launchOp = new PrivilegedOperation(
-        PrivilegedOperation.OperationType.LAUNCH_CONTAINER, (String) null);
+        PrivilegedOperation.OperationType.LAUNCH_CONTAINER);
 
     //All of these arguments are expected to be available in the runtime context
     launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
@@ -116,7 +116,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
       throws ContainerExecutionException {
     Container container = ctx.getContainer();
     PrivilegedOperation signalOp = new PrivilegedOperation(
-        PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null);
+        PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
 
     signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
         ctx.getExecutionAttribute(USER),
@@ -125,6 +125,9 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
         ctx.getExecutionAttribute(PID),
         Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
 
+    //Some failures here are acceptable. Let the calling executor decide.
+    signalOp.disableFailureLogging();
+
     try {
       PrivilegedOperationExecutor executor = PrivilegedOperationExecutor
           .getInstance(conf);
@@ -133,8 +136,8 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
           signalOp, null, container.getLaunchContext().getEnvironment(),
           false);
     } catch (PrivilegedOperationException e) {
-      LOG.warn("Signal container failed. Exception: ", e);
-
+      //Don't log the failure here. Some kinds of signaling failures are
+      // acceptable. Let the calling executor decide what to do.
       throw new ContainerExecutionException("Signal container failed", e
           .getExitCode(), e.getOutput(), e.getErrorOutput());
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
index 2dee663..2b4fc79 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
@@ -280,8 +280,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
     String commandFile = dockerClient.writeCommandToTempFile(runCommand,
         containerIdStr);
     PrivilegedOperation launchOp = new PrivilegedOperation(
-        PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER, (String)
-        null);
+        PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER);
 
     launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER),
         Integer.toString(PrivilegedOperation
@@ -321,7 +320,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
       throws ContainerExecutionException {
     Container container = ctx.getContainer();
     PrivilegedOperation signalOp = new PrivilegedOperation(
-        PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null);
+        PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
 
     signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
         ctx.getExecutionAttribute(USER),

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java
index 849dbab..7146412 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java
@@ -69,7 +69,7 @@ public class TestPrivilegedOperationExecutor {
     cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks";
     cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks";
     opDisallowed = new PrivilegedOperation
-        (PrivilegedOperation.OperationType.DELETE_AS_USER, (String) null);
+        (PrivilegedOperation.OperationType.DELETE_AS_USER);
     opTasksNone = new PrivilegedOperation
         (PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
             PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone);
@@ -118,7 +118,7 @@ public class TestPrivilegedOperationExecutor {
     PrivilegedOperationExecutor exec = PrivilegedOperationExecutor
         .getInstance(confWithExecutorPath);
     PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation
-        .OperationType.TC_MODIFY_STATE, (String) null);
+        .OperationType.TC_MODIFY_STATE);
     String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op);
 
     //No arguments added - so the resulting array should consist of

http://git-wip-us.apache.org/repos/asf/hadoop/blob/059caf99/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java
index 50f8da6..76d56b4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java
@@ -89,7 +89,7 @@ public class TestCGroupsHandlerImpl {
       cGroupsHandler = new CGroupsHandlerImpl(conf,
           privilegedOperationExecutorMock);
       PrivilegedOperation expectedOp = new PrivilegedOperation(
-          PrivilegedOperation.OperationType.MOUNT_CGROUPS, (String) null);
+          PrivilegedOperation.OperationType.MOUNT_CGROUPS);
       //This is expected to be of the form :
       //net_cls=<mount_path>/net_cls
       StringBuffer controllerKV = new StringBuffer(controller.getName())