You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@twill.apache.org by ch...@apache.org on 2014/04/22 08:06:26 UTC
[29/50] [abbrv] git commit: (TWILL-48) Make AM not to re-launch
container that failed at initialize stage.
(TWILL-48) Make AM not to re-launch container that failed at initialize stage.
Signed-off-by: Terence Yim <te...@continuuity.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-twill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-twill/commit/a77b67cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-twill/tree/a77b67cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-twill/diff/a77b67cf
Branch: refs/heads/site
Commit: a77b67cf3e465b966487b3002cb1450590cbc9d3
Parents: 9171d22
Author: Terence Yim <te...@continuuity.com>
Authored: Fri Feb 14 13:44:48 2014 -0800
Committer: Terence Yim <te...@continuuity.com>
Committed: Mon Feb 17 13:41:20 2014 -0800
----------------------------------------------------------------------
.../org/apache/twill/api/TwillRunnable.java | 4 +-
.../twill/internal/ContainerExitCodes.java | 51 +++++++++++++++
.../org/apache/twill/internal/ServiceMain.java | 27 +++++---
.../internal/appmaster/RunningContainers.java | 16 ++++-
.../twill/yarn/InitializeFailTestRun.java | 68 ++++++++++++++++++++
5 files changed, 153 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
----------------------------------------------------------------------
diff --git a/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java b/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
index 4350bfb..d88000a 100644
--- a/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
+++ b/twill-api/src/main/java/org/apache/twill/api/TwillRunnable.java
@@ -30,7 +30,9 @@ public interface TwillRunnable extends Runnable {
TwillRunnableSpecification configure();
/**
- * Called when the container process starts. Executed in container machine.
+ * Called when the container process starts. Executed in container machine. If any exception is thrown from this
+ * method, this runnable won't get retry.
+ *
* @param context Contains information about the runtime context.
*/
void initialize(TwillContext context);
http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java b/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java
new file mode 100644
index 0000000..22576a9
--- /dev/null
+++ b/twill-yarn/src/main/java/org/apache/twill/internal/ContainerExitCodes.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.twill.internal;
+
+/**
+ * Collection of known exit code. Some of the codes are copied from ContainerExitStatus as that class is missing in
+ * older YARN version.
+ */
+public final class ContainerExitCodes {
+
+ public static final int SUCCESS = 0;
+
+ /**
+ * When the container exit when it fails to initilize.
+ */
+ public static final int INIT_FAILED = 10;
+
+ public static final int INVALID = -1000;
+
+ /**
+ * Containers killed by the framework, either due to being released by
+ * the application or being 'lost' due to node failures etc.
+ */
+ public static final int ABORTED = -100;
+
+ /**
+ * When threshold number of the nodemanager-local-directories or
+ * threshold number of the nodemanager-log-directories become bad.
+ */
+ public static final int DISKS_FAILED = -101;
+
+ /**
+ * Containers preempted by the YARN framework.
+ */
+ public static final int PREEMPTED = -102;
+}
http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java b/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
index 4831158..740e0e3 100644
--- a/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
+++ b/twill-yarn/src/main/java/org/apache/twill/internal/ServiceMain.java
@@ -73,16 +73,25 @@ public abstract class ServiceMain {
// Listener for state changes of the service
ListenableFuture<Service.State> completion = Services.getCompletionFuture(service);
- // Starts the service
- LOG.info("Starting service {}.", serviceName);
- Futures.getUnchecked(Services.chainStart(zkClientService, service));
- LOG.info("Service {} started.", serviceName);
try {
- completion.get();
- LOG.info("Service {} completed.", serviceName);
- } catch (Throwable t) {
- LOG.warn("Exception thrown from service {}.", serviceName, t);
- throw Throwables.propagate(t);
+ try {
+ // Starts the service
+ LOG.info("Starting service {}.", serviceName);
+ Futures.allAsList(Services.chainStart(zkClientService, service).get()).get();
+ LOG.info("Service {} started.", serviceName);
+ } catch (Throwable t) {
+ LOG.error("Exception when starting service {}.", serviceName, t);
+ // Exit with the init fail exit code.
+ System.exit(ContainerExitCodes.INIT_FAILED);
+ }
+
+ try {
+ completion.get();
+ LOG.info("Service {} completed.", serviceName);
+ } catch (Throwable t) {
+ LOG.error("Exception thrown from service {}.", serviceName, t);
+ throw Throwables.propagate(t);
+ }
} finally {
ILoggerFactory loggerFactory = LoggerFactory.getILoggerFactory();
if (loggerFactory instanceof LoggerContext) {
http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java b/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
index 57c58da..63e3db8 100644
--- a/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
+++ b/twill-yarn/src/main/java/org/apache/twill/internal/appmaster/RunningContainers.java
@@ -34,6 +34,7 @@ import org.apache.twill.api.ResourceReport;
import org.apache.twill.api.RunId;
import org.apache.twill.api.ServiceController;
import org.apache.twill.api.TwillRunResources;
+import org.apache.twill.internal.ContainerExitCodes;
import org.apache.twill.internal.ContainerInfo;
import org.apache.twill.internal.DefaultResourceReport;
import org.apache.twill.internal.DefaultTwillRunResources;
@@ -309,10 +310,13 @@ final class RunningContainers {
LOG.warn("More than one controller found for container {}", containerId);
}
- if (exitStatus != 0) {
- LOG.warn("Container {} exited abnormally with state {}, exit code {}. Re-request the container.",
+ if (exitStatus != ContainerExitCodes.SUCCESS) {
+ LOG.warn("Container {} exited abnormally with state {}, exit code {}.",
containerId, state, exitStatus);
- restartRunnables.add(lookup.keySet().iterator().next());
+ if (shouldRetry(exitStatus)) {
+ LOG.info("Re-request the container {} for exit code {}.", containerId, exitStatus);
+ restartRunnables.add(lookup.keySet().iterator().next());
+ }
} else {
LOG.info("Container {} exited normally with state {}", containerId, state);
}
@@ -333,6 +337,12 @@ final class RunningContainers {
}
}
+ private boolean shouldRetry(int exitCode) {
+ return exitCode != ContainerExitCodes.SUCCESS
+ && exitCode != ContainerExitCodes.DISKS_FAILED
+ && exitCode != ContainerExitCodes.INIT_FAILED;
+ }
+
/**
* Sends a command through the given {@link TwillContainerController} of a runnable. Decrements the count
* when the sending of command completed. Triggers completion when count reaches zero.
http://git-wip-us.apache.org/repos/asf/incubator-twill/blob/a77b67cf/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java
----------------------------------------------------------------------
diff --git a/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java b/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java
new file mode 100644
index 0000000..39813cc
--- /dev/null
+++ b/twill-yarn/src/test/java/org/apache/twill/yarn/InitializeFailTestRun.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.twill.yarn;
+
+import org.apache.twill.api.AbstractTwillRunnable;
+import org.apache.twill.api.TwillContext;
+import org.apache.twill.api.TwillController;
+import org.apache.twill.api.TwillRunner;
+import org.apache.twill.api.logging.PrinterLogHandler;
+import org.apache.twill.common.Services;
+import org.junit.Test;
+
+import java.io.PrintWriter;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Test for no retry on runnable initialize failure.
+ */
+public class InitializeFailTestRun extends BaseYarnTest {
+
+ @Test
+ public void testInitFail() throws InterruptedException, ExecutionException, TimeoutException {
+ TwillRunner runner = YarnTestUtils.getTwillRunner();
+ TwillController controller = runner.prepare(new InitFailRunnable())
+ .addLogHandler(new PrinterLogHandler(new PrintWriter(System.out)))
+ .start();
+
+ Services.getCompletionFuture(controller).get(2, TimeUnit.MINUTES);
+ }
+
+ /**
+ * TwillRunnable class that throws exception in initialize.
+ */
+ public static final class InitFailRunnable extends AbstractTwillRunnable {
+
+ @Override
+ public void initialize(TwillContext context) {
+ throw new IllegalStateException("Fail to init");
+ }
+
+ @Override
+ public void run() {
+ // No-op
+ }
+
+ @Override
+ public void stop() {
+ // No-op
+ }
+ }
+}