You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@twill.apache.org by ch...@apache.org on 2014/04/22 08:06:26 UTC

[29/50] [abbrv] git commit: (TWILL-48) Make AM not to re-launch container that failed at initialize stage.

(TWILL-48) Make AM not to re-launch container that failed at initialize stage.

Signed-off-by: Terence Yim <te...@continuuity.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-twill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-twill/commit/a77b67cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-twill/tree/a77b67cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-twill/diff/a77b67cf

Branch: refs/heads/site
Commit: a77b67cf3e465b966487b3002cb1450590cbc9d3
Parents: 9171d22
Author: Terence Yim <te...@continuuity.com>
Authored: Fri Feb 14 13:44:48 2014 -0800
Committer: Terence Yim <te...@continuuity.com>
Committed: Mon Feb 17 13:41:20 2014 -0800

----------------------------------------------------------------------
 .../org/apache/twill/api/TwillRunnable.java     |  4 +-
 .../twill/internal/ContainerExitCodes.java      | 51 +++++++++++++++
 .../org/apache/twill/internal/ServiceMain.java  | 27 +++++---
 .../internal/appmaster/RunningContainers.java   | 16 ++++-
 .../twill/yarn/InitializeFailTestRun.java       | 68 ++++++++++++++++++++
 5 files changed, 153 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
----------------------------------------------------------------------
diff --git a/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java b/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
index 4350bfb..d88000a 100644
--- a/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
+++ b/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
@@ -30,7 +30,9 @@ public interface TwillRunnable extends Runnable {
   TwillRunnableSpecification configure();
 
   /**
-   * Called when the container process starts. Executed in container machine.
+   * Called when the container process starts. Executed in container machine. If any exception is thrown from this
+   * method, this runnable won't get retry.
+   *
    * @param context Contains information about the runtime context.
    */
   void initialize(TwillContext context);

http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java b/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java
new file mode 100644
index 0000000..22576a9
--- /dev/null
+++ b/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.twill.internal;
+
+/**
+ * Collection of known exit code. Some of the codes are copied from ContainerExitStatus as that class is missing in
+ * older YARN version.
+ */
+public final class ContainerExitCodes {
+
+  public static final int SUCCESS = 0;
+
+  /**
+   * When the container exit when it fails to initilize.
+   */
+  public static final int INIT_FAILED = 10;
+
+  public static final int INVALID = -1000;
+
+  /**
+   * Containers killed by the framework, either due to being released by
+   * the application or being 'lost' due to node failures etc.
+   */
+  public static final int ABORTED = -100;
+
+  /**
+   * When threshold number of the nodemanager-local-directories or
+   * threshold number of the nodemanager-log-directories become bad.
+   */
+  public static final int DISKS_FAILED = -101;
+
+  /**
+   * Containers preempted by the YARN framework.
+   */
+  public static final int PREEMPTED = -102;
+}

http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java b/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
index 4831158..740e0e3 100644
--- a/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
+++ b/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
@@ -73,16 +73,25 @@ public abstract class ServiceMain {
     // Listener for state changes of the service
     ListenableFuture<Service.State> completion = Services.getCompletionFuture(service);
 
-    // Starts the service
-    LOG.info("Starting service {}.", serviceName);
-    Futures.getUnchecked(Services.chainStart(zkClientService, service));
-    LOG.info("Service {} started.", serviceName);
     try {
-      completion.get();
-      LOG.info("Service {} completed.", serviceName);
-    } catch (Throwable t) {
-      LOG.warn("Exception thrown from service {}.", serviceName, t);
-      throw Throwables.propagate(t);
+      try {
+        // Starts the service
+        LOG.info("Starting service {}.", serviceName);
+        Futures.allAsList(Services.chainStart(zkClientService, service).get()).get();
+        LOG.info("Service {} started.", serviceName);
+      } catch (Throwable t) {
+        LOG.error("Exception when starting service {}.", serviceName, t);
+        // Exit with the init fail exit code.
+        System.exit(ContainerExitCodes.INIT_FAILED);
+      }
+
+      try {
+        completion.get();
+        LOG.info("Service {} completed.", serviceName);
+      } catch (Throwable t) {
+        LOG.error("Exception thrown from service {}.", serviceName, t);
+        throw Throwables.propagate(t);
+      }
     } finally {
       ILoggerFactory loggerFactory = LoggerFactory.getILoggerFactory();
       if (loggerFactory instanceof LoggerContext) {

http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java b/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
index 57c58da..63e3db8 100644
--- a/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
+++ b/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
@@ -34,6 +34,7 @@ import org.apache.twill.api.ResourceReport;
 import org.apache.twill.api.RunId;
 import org.apache.twill.api.ServiceController;
 import org.apache.twill.api.TwillRunResources;
+import org.apache.twill.internal.ContainerExitCodes;
 import org.apache.twill.internal.ContainerInfo;
 import org.apache.twill.internal.DefaultResourceReport;
 import org.apache.twill.internal.DefaultTwillRunResources;
@@ -309,10 +310,13 @@ final class RunningContainers {
         LOG.warn("More than one controller found for container {}", containerId);
       }
 
-      if (exitStatus != 0) {
-        LOG.warn("Container {} exited abnormally with state {}, exit code {}. Re-request the container.",
+      if (exitStatus != ContainerExitCodes.SUCCESS) {
+        LOG.warn("Container {} exited abnormally with state {}, exit code {}.",
                  containerId, state, exitStatus);
-        restartRunnables.add(lookup.keySet().iterator().next());
+        if (shouldRetry(exitStatus)) {
+          LOG.info("Re-request the container {} for exit code {}.", containerId, exitStatus);
+          restartRunnables.add(lookup.keySet().iterator().next());
+        }
       } else {
         LOG.info("Container {} exited normally with state {}", containerId, state);
       }
@@ -333,6 +337,12 @@ final class RunningContainers {
     }
   }
 
+  private boolean shouldRetry(int exitCode) {
+    return exitCode != ContainerExitCodes.SUCCESS
+      && exitCode != ContainerExitCodes.DISKS_FAILED
+      && exitCode != ContainerExitCodes.INIT_FAILED;
+  }
+
   /**
    * Sends a command through the given {@link TwillContainerController} of a runnable. Decrements the count
    * when the sending of command completed. Triggers completion when count reaches zero.

http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java b/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java
new file mode 100644
index 0000000..39813cc
--- /dev/null
+++ b/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.twill.yarn;
+
+import org.apache.twill.api.AbstractTwillRunnable;
+import org.apache.twill.api.TwillContext;
+import org.apache.twill.api.TwillController;
+import org.apache.twill.api.TwillRunner;
+import org.apache.twill.api.logging.PrinterLogHandler;
+import org.apache.twill.common.Services;
+import org.junit.Test;
+
+import java.io.PrintWriter;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Test for no retry on runnable initialize failure.
+ */
+public class InitializeFailTestRun extends BaseYarnTest {
+
+  @Test
+  public void testInitFail() throws InterruptedException, ExecutionException, TimeoutException {
+    TwillRunner runner = YarnTestUtils.getTwillRunner();
+    TwillController controller = runner.prepare(new InitFailRunnable())
+                                       .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out)))
+                                       .start();
+
+    Services.getCompletionFuture(controller).get(2, TimeUnit.MINUTES);
+  }
+
+  /**
+   * TwillRunnable class that throws exception in initialize.
+   */
+  public static final class InitFailRunnable extends AbstractTwillRunnable {
+
+    @Override
+    public void initialize(TwillContext context) {
+      throw new IllegalStateException("Fail to init");
+    }
+
+    @Override
+    public void run() {
+      // No-op
+    }
+
+    @Override
+    public void stop() {
+      // No-op
+    }
+  }
+}