You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2014/10/06 04:57:16 UTC
[19/24] git commit: SLIDER-467 cleanly shut down AMs should say
"succeeded" in app reports. Easier said than done
SLIDER-467 cleanly shut down AMs should say "succeeded" in app reports. Easier said than done
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/155262bf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/155262bf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/155262bf
Branch: refs/heads/feature/SLIDER-149_Support_a_YARN_service_registry
Commit: 155262bf7555139cc54d553e6ef34e612d075a6f
Parents: 4a53f55
Author: Steve Loughran <st...@apache.org>
Authored: Sat Oct 4 14:02:48 2014 -0700
Committer: Steve Loughran <st...@apache.org>
Committed: Sat Oct 4 14:02:48 2014 -0700
----------------------------------------------------------------------
.../providers/AbstractProviderService.java | 21 ++++++++++-
.../providers/agent/AgentProviderService.java | 22 ------------
.../slideram/SliderAMProviderService.java | 8 -----
.../apache/slider/server/appmaster/AMUtils.java | 2 +-
.../server/appmaster/SliderAppMaster.java | 38 +++++++++-----------
.../appmaster/actions/ActionStopSlider.java | 20 +++++++++++
.../workflow/WorkflowSequenceService.java | 8 ++++-
.../standalone/TestStandaloneAMDestroy.groovy | 3 +-
.../standalone/TestStandaloneAMRestart.groovy | 36 +++++++++++++------
.../slider/test/YarnMiniClusterTestBase.groovy | 13 ++++---
.../providers/hbase/HBaseProviderService.java | 23 ------------
11 files changed, 99 insertions(+), 95 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
index baddb56..b3cf527 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
@@ -77,6 +77,7 @@ public abstract class AbstractProviderService
public AbstractProviderService(String name) {
super(name);
+ setStopIfNoChildServicesAtStartup(false);
}
@Override
@@ -184,7 +185,25 @@ public abstract class AbstractProviderService
}
return false;
}
-
+
+ /**
+ * override point to allow a process to start executing in this container
+ * @param instanceDefinition cluster description
+ * @param confDir configuration directory
+ * @param env environment
+ * @param execInProgress the callback for the exec events
+ * @return false
+ * @throws IOException
+ * @throws SliderException
+ */
+ @Override
+ public boolean exec(AggregateConf instanceDefinition,
+ File confDir,
+ Map<String, String> env,
+ ProviderCompleted execInProgress) throws IOException, SliderException {
+ return false;
+ }
+
@SuppressWarnings("ThrowableResultOfMethodCallIgnored")
@Override // ExitCodeProvider
public int getExitCode() {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
index d7943b2..67a268e 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/agent/AgentProviderService.java
@@ -395,28 +395,6 @@ public class AgentProviderService extends AbstractProviderService implements
}
}
- /**
- * Run this service
- *
- * @param instanceDefinition component description
- * @param confDir local dir with the config
- * @param env environment variables above those generated by
- * @param execInProgress callback for the event notification
- *
- * @throws IOException IO problems
- * @throws SliderException anything internal
- */
- @Override
- public boolean exec(AggregateConf instanceDefinition,
- File confDir,
- Map<String, String> env,
- ProviderCompleted execInProgress) throws
- IOException,
- SliderException {
-
- return false;
- }
-
@Override
public boolean isSupportedRole(String role) {
return true;
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
index 863ea7e..2b2d1c7 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/slideram/SliderAMProviderService.java
@@ -91,14 +91,6 @@ public class SliderAMProviderService extends AbstractProviderService implements
}
@Override
- public boolean exec(AggregateConf instanceDefinition,
- File confDir,
- Map<String, String> env,
- ProviderCompleted execInProgress) throws IOException, SliderException {
- return false;
- }
-
- @Override
public List<ProviderRole> getRoles() {
return new ArrayList<ProviderRole>(0);
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/server/appmaster/AMUtils.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/AMUtils.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/AMUtils.java
index 39f511a..32684c6 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/AMUtils.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/AMUtils.java
@@ -33,7 +33,7 @@ public class AMUtils {
return LauncherExitCodes.EXIT_SUCCESS;
//remap from a planned shutdown to a failure
case LauncherExitCodes.EXIT_CLIENT_INITIATED_SHUTDOWN:
- return SliderExitCodes.EXIT_PROCESS_FAILED;
+ return SliderExitCodes.EXIT_SUCCESS;
default:
return exitCode;
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index aac8106..7fbea86 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -20,6 +20,7 @@ package org.apache.slider.server.appmaster;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import com.google.protobuf.BlockingService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -846,15 +847,14 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
// launch the real provider; this is expected to trigger a callback that
// starts the node review process
launchProviderService(instanceDefinition, confDir);
-
+
//now block waiting to be told to exit the process
waitForAMCompletionSignal();
- //shutdown time
- } finally {
- finish();
+ } catch(Exception e) {
+ stopAction = new ActionStopSlider(e);
}
-
- return amExitCode;
+ //shutdown time
+ return finish();
}
private void startAgentWebApp(MapOperations appInformation,
@@ -1064,14 +1064,6 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
} finally {
AMExecutionStateLock.unlock();
}
- //add a sleep here for about a second. Why? it
- //stops RPC calls breaking so dramatically when the cluster
- //is torn down mid-RPC
- try {
- Thread.sleep(TERMINATION_SIGNAL_PROPAGATION_DELAY);
- } catch (InterruptedException ignored) {
- //ignored
- }
}
/**
@@ -1083,6 +1075,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
// this is a queued action: schedule it through the queues
schedule(stopActionRequest);
}
+
/**
* Signal that the AM is complete
*
@@ -1105,8 +1098,10 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
/**
* trigger the YARN cluster termination process
+ * @return the exit code
*/
- private synchronized void finish() {
+ private synchronized int finish() {
+ Preconditions.checkNotNull(stopAction, "null stop action");
FinalApplicationStatus appStatus;
log.info("Triggering shutdown of the AM: {}", stopAction);
@@ -1145,6 +1140,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
} catch (YarnException e) {
log.info("Failed to unregister application: " + e, e);
}
+ return exitCode;
}
/**
@@ -1377,7 +1373,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
public void onShutdownRequest() {
LOG_YARN.info("Shutdown Request received");
signalAMComplete(new ActionStopSlider("stop",
- EXIT_CLIENT_INITIATED_SHUTDOWN,
+ EXIT_SUCCESS,
FinalApplicationStatus.SUCCEEDED,
"Shutdown requested from RM"));
}
@@ -1627,7 +1623,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
return Messages.AMSuicideResponseProto.getDefaultInstance();
}
- /* =================================================================== */
+/* =================================================================== */
/* END */
/* =================================================================== */
@@ -1664,7 +1660,6 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
}
}
-
/* =================================================================== */
/* EventCallback from the child or ourselves directly */
/* =================================================================== */
@@ -1680,9 +1675,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
//this may happen in a separate thread, so the ability to act is limited
log.error("Failed to flex cluster nodes: {}", e, e);
//declare a failure
- queue(new ActionStopSlider("stop",
- EXIT_DEPLOYMENT_FAILED, FinalApplicationStatus.FAILED,
- "Failed to create application:" + e.toString()));
+ queue(new ActionStopSlider(e));
}
}
@@ -1728,7 +1721,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
if (shouldTriggerFailure) {
String reason =
- "Spawned master exited with raw " + exitCode + " mapped to " +
+ "Spawned process failed with raw " + exitCode + " mapped to " +
mappedProcessExitCode;
ActionStopSlider stop = new ActionStopSlider("stop",
mappedProcessExitCode,
@@ -1916,6 +1909,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
InternalKeys.DEFAULT_CHAOS_MONKEY_ENABLED);
if (!enabled) {
log.info("Chaos monkey disabled");
+ return false;
}
long monkeyInterval = internals.getTimeRange(
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
index 39ff761..d2f23a2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
@@ -20,6 +20,8 @@ package org.apache.slider.server.appmaster.actions;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
+import org.apache.slider.core.main.ExitCodeProvider;
+import org.apache.slider.core.main.LauncherExitCodes;
import org.apache.slider.server.appmaster.SliderAppMaster;
import org.apache.slider.server.appmaster.state.AppState;
@@ -91,6 +93,24 @@ public class ActionStopSlider extends AsyncAction {
ex.getMessage());
}
+ /**
+ * Build from an exception.
+ * <p>
+ * If the exception implements
+ * {@link ExitCodeProvider} then the exit code is extracted from that
+ * @param ex exception.
+ */
+ public ActionStopSlider(Exception ex) {
+ super("stop");
+ if (ex instanceof ExitCodeProvider) {
+ setExitCode(((ExitCodeProvider)ex).getExitCode());
+ } else {
+ setExitCode(LauncherExitCodes.EXIT_EXCEPTION_THROWN);
+ }
+ setFinalApplicationStatus(FinalApplicationStatus.FAILED);
+ setMessage(ex.getMessage());
+ }
+
@Override
public void execute(SliderAppMaster appMaster,
QueueAccess queueService,
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowSequenceService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowSequenceService.java b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowSequenceService.java
index ca07f99..e584e63 100644
--- a/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowSequenceService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/services/workflow/WorkflowSequenceService.java
@@ -79,6 +79,8 @@ public class WorkflowSequenceService extends AbstractService implements
null if one did not finish yet
*/
private volatile Service previousService;
+
+ private boolean stopIfNoChildServicesAtStartup = true;
/**
* Construct an instance
@@ -133,13 +135,17 @@ public class WorkflowSequenceService extends AbstractService implements
return previousService;
}
+ protected void setStopIfNoChildServicesAtStartup(boolean stopIfNoChildServicesAtStartup) {
+ this.stopIfNoChildServicesAtStartup = stopIfNoChildServicesAtStartup;
+ }
+
/**
* When started
* @throws Exception
*/
@Override
protected void serviceStart() throws Exception {
- if (!startNextService()) {
+ if (!startNextService() && stopIfNoChildServicesAtStartup) {
//nothing to start -so stop
stop();
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
index fa48b70..463c4c0 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMDestroy.groovy
@@ -94,7 +94,6 @@ class TestStandaloneAMDestroy extends AgentMiniClusterTestBase {
describe "END EXPECTED WARNINGS"
-
describe "destroying $clustername"
//now: destroy it
@@ -153,6 +152,8 @@ class TestStandaloneAMDestroy extends AgentMiniClusterTestBase {
//and try to destroy a completely different cluster just for the fun of it
assert 0 == sliderClient.actionDestroy("no-cluster-of-this-name")
+
+ maybeStopCluster(cluster2, "", "Teardown at end of test case", false);
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
index 1073309..947529c 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/standalone/TestStandaloneAMRestart.groovy
@@ -47,11 +47,13 @@ class TestStandaloneAMRestart extends AgentMiniClusterTestBase {
// patch the configuration for AM restart
YarnConfiguration conf = getRestartableConfiguration(5)
+ int restartLimit = 3;
String clustername = createMiniCluster("", conf, 1, true)
ServiceLauncher<SliderClient> launcher =
createStandaloneAMWithArgs(clustername,
[
- Arguments.ARG_OPTION, SliderXmlConfKeys.KEY_AM_RESTART_LIMIT, "4"
+ Arguments.ARG_OPTION, SliderXmlConfKeys.KEY_AM_RESTART_LIMIT,
+ "$restartLimit".toString()
],
true,
false)
@@ -68,21 +70,17 @@ class TestStandaloneAMRestart extends AgentMiniClusterTestBase {
diagnosticArgs.yarn = true
sliderClient.actionDiagnostic(diagnosticArgs)
- ActionAMSuicideArgs args = new ActionAMSuicideArgs()
- args.message = "test AM iteration"
- args.waittime = 100
- args.exitcode = 1
- sliderClient.actionAmSuicide(clustername, args)
- waitWhileClusterLive(sliderClient);
- //give yarn some time to notice
- sleep(20000)
- waitUntilClusterLive(sliderClient, 20000)
+ int iteration = 1;
+ killAM(iteration, sliderClient, clustername)
+ killAM(iteration++, sliderClient, clustername)
// app should be running here
assert 0 == sliderClient.actionExists(clustername, true)
+
// kill again & expect it to be considered a failure
- sliderClient.actionAmSuicide(clustername, args)
+ killAM(iteration++, sliderClient, clustername)
+
report = sliderClient.applicationReport
assert report.finalApplicationStatus == FinalApplicationStatus.FAILED
@@ -95,6 +93,22 @@ class TestStandaloneAMRestart extends AgentMiniClusterTestBase {
assert 0 == clusterActionFreeze(sliderClient, clustername, "force", true)
}
+ public ActionAMSuicideArgs killAM(
+ int iteration,
+ SliderClient sliderClient,
+ String clustername) {
+ ActionAMSuicideArgs args = new ActionAMSuicideArgs()
+ args.waittime = 100
+ args.exitcode = 1
+ args.message = "kill AM iteration #$iteration"
+ sliderClient.actionAmSuicide(clustername, args)
+ waitWhileClusterLive(sliderClient);
+ //give yarn some time to notice
+ sleep(20000)
+ waitUntilClusterLive(sliderClient, 20000)
+ return args
+ }
+
/**
* Get a restartable configuration
* @param restarts
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
index aa82bdb..9595a32 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/YarnMiniClusterTestBase.groovy
@@ -193,8 +193,10 @@ public abstract class YarnMiniClusterTestBase extends ServiceLauncherBaseTest {
clustersToTeardown << client;
}
protected void addToTeardown(ServiceLauncher<SliderClient> launcher) {
- SliderClient sliderClient = launcher.service
- if (sliderClient) addToTeardown(sliderClient)
+ SliderClient sliderClient = launcher?.service
+ if (sliderClient) {
+ addToTeardown(sliderClient)
+ }
}
@@ -208,7 +210,7 @@ public abstract class YarnMiniClusterTestBase extends ServiceLauncherBaseTest {
public void stopRunningClusters() {
clustersToTeardown.each { SliderClient client ->
try {
- maybeStopCluster(client, "", "Teardown at end of test case");
+ maybeStopCluster(client, "", "Teardown at end of test case", true);
} catch (Exception e) {
log.warn("While stopping cluster " + e, e);
}
@@ -748,14 +750,15 @@ public abstract class YarnMiniClusterTestBase extends ServiceLauncherBaseTest {
public int maybeStopCluster(
SliderClient sliderClient,
String clustername,
- String message) {
+ String message,
+ boolean force = false) {
if (sliderClient != null) {
if (!clustername) {
clustername = sliderClient.deployedClusterName;
}
//only stop a cluster that exists
if (clustername) {
- return clusterActionFreeze(sliderClient, clustername, message);
+ return clusterActionFreeze(sliderClient, clustername, message, force);
}
}
return 0;
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/155262bf/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
----------------------------------------------------------------------
diff --git a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
index dc11050..f75a6c7 100644
--- a/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
+++ b/slider-providers/hbase/slider-hbase-provider/src/main/java/org/apache/slider/providers/hbase/HBaseProviderService.java
@@ -281,29 +281,6 @@ public class HBaseProviderService extends AbstractProviderService
}
/**
- * Run this service
- *
- *
- * @param instanceDefinition component description
- * @param confDir local dir with the config
- * @param env environment variables above those generated by
- * @param execInProgress callback for the event notification
- * @throws IOException IO problems
- * @throws SliderException anything internal
- */
- @Override
- public boolean exec(AggregateConf instanceDefinition,
- File confDir,
- Map<String, String> env,
- ProviderCompleted execInProgress) throws
- IOException,
- SliderException {
-
- return false;
- }
-
-
- /**
* This is a validation of the application configuration on the AM.
* Here is where things like the existence of keytabs and other
* not-seen-client-side properties can be tested, before