You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by go...@apache.org on 2017/03/05 05:11:02 UTC
incubator-slider git commit: SLIDER-1209 Provide information on
whether a slider app was killed / stopped via a request (exitReason)
Repository: incubator-slider
Updated Branches:
refs/heads/develop bf1b41d70 -> 134ef53f9
SLIDER-1209 Provide information on whether a slider app was killed / stopped via a request (exitReason)
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/134ef53f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/134ef53f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/134ef53f
Branch: refs/heads/develop
Commit: 134ef53f9a0330e0c007ebfc620f1ea40017e54e
Parents: bf1b41d
Author: Gour Saha <go...@apache.org>
Authored: Sat Mar 4 21:10:06 2017 -0800
Committer: Gour Saha <go...@apache.org>
Committed: Sat Mar 4 21:10:06 2017 -0800
----------------------------------------------------------------------
.../org/apache/slider/api/SliderExitReason.java | 27 +++++++++++++
.../api/types/ApplicationDiagnostics.java | 10 +++++
.../server/appmaster/SliderAppMaster.java | 40 +++++++++++++-------
.../appmaster/actions/ActionStopSlider.java | 10 +++++
.../server/appmaster/rpc/SliderIPCService.java | 2 +
.../application/actions/RestActionStop.java | 2 +
.../lifecycle/AgentClusterLifecycleIT.groovy | 9 +++++
7 files changed, 87 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java b/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java
new file mode 100644
index 0000000..de698c9
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.slider.api;
+
+/**
+ * A high level reason for an application failure. For most of the cases it is
+ * difficult to decipher if the Slider app failed due to an application error.
+ * This gap can be bridged a little better when we get to SLIDER-1208.
+ *
+ */
+public enum SliderExitReason {
+ STOP_COMMAND_ISSUED, SLIDER_AM_ERROR, SLIDER_AGENT_ERROR, CHAOS_MONKEY, YARN_ERROR, APP_ERROR;
+}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java b/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
index c28c11b..f609017 100644
--- a/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
+++ b/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
@@ -25,6 +25,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.slider.api.SliderExitReason;
import org.codehaus.jackson.JsonGenerationException;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.annotate.JsonIgnore;
@@ -46,6 +47,7 @@ public class ApplicationDiagnostics {
private Map<String, ContainerInformation> containersMap = new HashMap<>();
private FinalApplicationStatus finalStatus;
private String finalMessage;
+ private SliderExitReason exitReason;
private Set<ContainerInformation> containers = new HashSet<>();
private Set<String> recentFailedContainers = new HashSet<>();
@@ -98,6 +100,14 @@ public class ApplicationDiagnostics {
this.finalMessage = finalMessage;
}
+ public SliderExitReason getExitReason() {
+ return exitReason;
+ }
+
+ public void setExitReason(SliderExitReason exitReason) {
+ this.exitReason = exitReason;
+ }
+
@Override
public String toString() {
try {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index f076e87..c33f7ac 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -90,6 +90,7 @@ import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.InternalKeys;
import org.apache.slider.api.ResourceKeys;
import org.apache.slider.api.RoleKeys;
+import org.apache.slider.api.SliderExitReason;
import org.apache.slider.api.StatusKeys;
import org.apache.slider.api.proto.SliderClusterAPI;
import org.apache.slider.api.types.ApplicationDiagnostics;
@@ -1017,7 +1018,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
log.error("Exception : {}", e, e);
// call the AM stop command as if it had been queued (but without
// going via the queue, which may not have started
- onAMStop(new ActionStopSlider(e));
+ ActionStopSlider stopSlider = new ActionStopSlider(e);
+ stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+ onAMStop(stopSlider);
}
//shutdown time
return finish();
@@ -1617,6 +1620,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
ApplicationDiagnostics appDiagnostics = getApplicationDiagnostics();
appDiagnostics.setFinalStatus(appStatus);
appDiagnostics.setFinalMessage(finalMessage);
+ appDiagnostics.setExitReason(stopAction.getExitReason());
String appMessage = appDiagnostics.toString();
try {
log.info("Unregistering AM status={} message={}", appStatus, appMessage);
@@ -1961,7 +1965,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
} catch (TriggerClusterTeardownException e) {
//App state has decided that it is time to exit
log.error("Cluster teardown triggered {}", e, e);
- queue(new ActionStopSlider(e));
+ ActionStopSlider stopSlider = new ActionStopSlider(e);
+ stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+ queue(stopSlider);
}
}
@@ -2028,10 +2034,10 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
@Override //AMRMClientAsync
public void onShutdownRequest() {
LOG_YARN.info("Shutdown Request received");
- signalAMComplete(new ActionStopSlider("stop",
- EXIT_SUCCESS,
- FinalApplicationStatus.SUCCEEDED,
- "Shutdown requested from RM"));
+ ActionStopSlider stopSlider = new ActionStopSlider("stop", EXIT_SUCCESS,
+ FinalApplicationStatus.SUCCEEDED, "Shutdown requested from RM");
+ stopSlider.setExitReason(SliderExitReason.YARN_ERROR);
+ signalAMComplete(stopSlider);
}
/**
@@ -2069,9 +2075,11 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
if (e instanceof InvalidResourceRequestException) {
// stop the cluster
LOG_YARN.error("AMRMClientAsync.onError() received {}", e, e);
- signalAMComplete(new ActionStopSlider("stop", EXIT_EXCEPTION_THROWN,
- FinalApplicationStatus.FAILED,
- SliderUtils.extractFirstLine(e.getLocalizedMessage())));
+ ActionStopSlider stopSlider = new ActionStopSlider("stop",
+ EXIT_EXCEPTION_THROWN, FinalApplicationStatus.FAILED,
+ SliderUtils.extractFirstLine(e.getLocalizedMessage()));
+ stopSlider.setExitReason(SliderExitReason.APP_ERROR);
+ signalAMComplete(stopSlider);
} else if (e instanceof InvalidApplicationMasterRequestException) {
// halt the AM
LOG_YARN.error("AMRMClientAsync.onError() received {}", e, e);
@@ -2165,7 +2173,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
// cluster flex failure: log
log.error("Failed to flex cluster nodes: {}", e, e);
// then what? exit
- queue(new ActionStopSlider(e));
+ ActionStopSlider stopSlider = new ActionStopSlider(e);
+ stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+ queue(stopSlider);
}
}
@@ -2217,6 +2227,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
mappedProcessExitCode,
FinalApplicationStatus.FAILED,
reason);
+ stop.setExitReason(SliderExitReason.YARN_ERROR);
//this wasn't expected: the process finished early
spawnedProcessExitedBeforeShutdownTriggered = true;
log.info(
@@ -2392,9 +2403,11 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
if (exception instanceof ExitCodeProvider) {
exitCode = ((ExitCodeProvider) exception).getExitCode();
}
- signalAMComplete(
- new ActionStopSlider("stop", exitCode, FinalApplicationStatus.FAILED,
- SliderUtils.extractFirstLine(exception.getLocalizedMessage())));
+ ActionStopSlider stopSlider = new ActionStopSlider("stop", exitCode,
+ FinalApplicationStatus.FAILED,
+ SliderUtils.extractFirstLine(exception.getLocalizedMessage()));
+ stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+ signalAMComplete(stopSlider);
}
}
@@ -2451,6 +2464,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
LauncherExitCodes.EXIT_FALSE,
FinalApplicationStatus.FAILED,
E_TRIGGERED_LAUNCH_FAILURE);
+ stop.setExitReason(SliderExitReason.CHAOS_MONKEY);
queue(stop);
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
index 055cea5..6d9e466 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
@@ -19,6 +19,7 @@
package org.apache.slider.server.appmaster.actions;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.slider.api.SliderExitReason;
import org.apache.slider.core.exceptions.ExceptionConverter;
import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
import org.apache.slider.core.main.ExitCodeProvider;
@@ -37,6 +38,7 @@ public class ActionStopSlider extends AsyncAction {
private FinalApplicationStatus finalApplicationStatus;
private String message;
private final Exception ex;
+ private SliderExitReason exitReason;
/**
* Simple constructor
@@ -159,4 +161,12 @@ public class ActionStopSlider extends AsyncAction {
public Exception getEx() {
return ex;
}
+
+ public SliderExitReason getExitReason() {
+ return exitReason;
+ }
+
+ public void setExitReason(SliderExitReason exitReason) {
+ this.exitReason = exitReason;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
index fda23aa..1496ed0 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.SliderClusterProtocol;
+import org.apache.slider.api.SliderExitReason;
import org.apache.slider.api.proto.Messages;
import org.apache.slider.api.types.ApplicationLivenessInformation;
import org.apache.slider.api.types.ComponentInformation;
@@ -184,6 +185,7 @@ public class SliderIPCService extends AbstractService
LauncherExitCodes.EXIT_SUCCESS,
FinalApplicationStatus.SUCCEEDED,
message);
+ stopSlider.setExitReason(SliderExitReason.STOP_COMMAND_ISSUED);
log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
schedule(stopSlider);
return Messages.StopClusterResponseProto.getDefaultInstance();
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
index 544f589..02e2295 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
@@ -19,6 +19,7 @@
package org.apache.slider.server.appmaster.web.rest.application.actions;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.slider.api.SliderExitReason;
import org.apache.slider.core.main.LauncherExitCodes;
import org.apache.slider.server.appmaster.actions.ActionStopSlider;
import org.apache.slider.server.appmaster.web.WebAppApi;
@@ -59,6 +60,7 @@ public class RestActionStop {
LauncherExitCodes.EXIT_SUCCESS,
FinalApplicationStatus.SUCCEEDED,
text);
+ stopSlider.setExitReason(SliderExitReason.STOP_COMMAND_ISSUED);
log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
slider.getQueues().schedule(stopSlider);
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index faeb0a1..e8e2791 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -22,12 +22,15 @@ import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.YarnApplicationState
import org.apache.slider.api.ClusterDescription
+import org.apache.slider.api.SliderExitReason;
import org.apache.slider.api.StatusKeys
+import org.apache.slider.api.types.ApplicationDiagnostics;
import org.apache.slider.client.SliderClient
import org.apache.slider.common.SliderExitCodes
import org.apache.slider.common.SliderXmlConfKeys
import org.apache.slider.common.params.Arguments
import org.apache.slider.common.params.SliderActions
+import org.apache.slider.core.launch.SerializedApplicationReport
import org.apache.slider.funtest.ResourcePaths
import org.apache.slider.funtest.framework.AgentCommandTestBase
import org.apache.slider.funtest.framework.FuntestProperties
@@ -145,6 +148,12 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
// should be in finished state, as this was a clean shutdown
assertInYarnState(appId, YarnApplicationState.FINISHED)
+ // Get diagnostics and validate that exitReason is STOP_COMMAND_ISSUED
+ SerializedApplicationReport appReport = lookupApplication(appId)
+ log.info("Application Report {}", appReport);
+ ApplicationDiagnostics appDiagnostics = ApplicationDiagnostics.fromJson(appReport.diagnostics)
+ assert appDiagnostics.exitReason == SliderExitReason.STOP_COMMAND_ISSUED
+
//cluster exists if you don't want it to be live
exists(0, CLUSTER, false)
//condition returns false if it is required to be live