You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by go...@apache.org on 2017/03/05 05:11:02 UTC

incubator-slider git commit: SLIDER-1209 Provide information on whether a slider app was killed / stopped via a request (exitReason)

Repository: incubator-slider
Updated Branches:
  refs/heads/develop bf1b41d70 -> 134ef53f9


SLIDER-1209 Provide information on whether a slider app was killed / stopped via a request (exitReason)


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/134ef53f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/134ef53f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/134ef53f

Branch: refs/heads/develop
Commit: 134ef53f9a0330e0c007ebfc620f1ea40017e54e
Parents: bf1b41d
Author: Gour Saha <go...@apache.org>
Authored: Sat Mar 4 21:10:06 2017 -0800
Committer: Gour Saha <go...@apache.org>
Committed: Sat Mar 4 21:10:06 2017 -0800

----------------------------------------------------------------------
 .../org/apache/slider/api/SliderExitReason.java | 27 +++++++++++++
 .../api/types/ApplicationDiagnostics.java       | 10 +++++
 .../server/appmaster/SliderAppMaster.java       | 40 +++++++++++++-------
 .../appmaster/actions/ActionStopSlider.java     | 10 +++++
 .../server/appmaster/rpc/SliderIPCService.java  |  2 +
 .../application/actions/RestActionStop.java     |  2 +
 .../lifecycle/AgentClusterLifecycleIT.groovy    |  9 +++++
 7 files changed, 87 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java b/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java
new file mode 100644
index 0000000..de698c9
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/api/SliderExitReason.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.slider.api;
+
+/**
+ * A high level reason for an application failure. For most of the cases it is
+ * difficult to decipher if the Slider app failed due to an application error.
+ * This gap can be bridged a little better when we get to SLIDER-1208.
+ *
+ */
+public enum SliderExitReason {
+  STOP_COMMAND_ISSUED, SLIDER_AM_ERROR, SLIDER_AGENT_ERROR, CHAOS_MONKEY, YARN_ERROR, APP_ERROR;
+}

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java b/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
index c28c11b..f609017 100644
--- a/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
+++ b/slider-core/src/main/java/org/apache/slider/api/types/ApplicationDiagnostics.java
@@ -25,6 +25,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.slider.api.SliderExitReason;
 import org.codehaus.jackson.JsonGenerationException;
 import org.codehaus.jackson.JsonParseException;
 import org.codehaus.jackson.annotate.JsonIgnore;
@@ -46,6 +47,7 @@ public class ApplicationDiagnostics {
   private Map<String, ContainerInformation> containersMap = new HashMap<>();
   private FinalApplicationStatus finalStatus;
   private String finalMessage;
+  private SliderExitReason exitReason;
   private Set<ContainerInformation> containers = new HashSet<>();
   private Set<String> recentFailedContainers = new HashSet<>();
 
@@ -98,6 +100,14 @@ public class ApplicationDiagnostics {
     this.finalMessage = finalMessage;
   }
 
+  public SliderExitReason getExitReason() {
+    return exitReason;
+  }
+
+  public void setExitReason(SliderExitReason exitReason) {
+    this.exitReason = exitReason;
+  }
+
   @Override
   public String toString() {
     try {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index f076e87..c33f7ac 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -90,6 +90,7 @@ import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.InternalKeys;
 import org.apache.slider.api.ResourceKeys;
 import org.apache.slider.api.RoleKeys;
+import org.apache.slider.api.SliderExitReason;
 import org.apache.slider.api.StatusKeys;
 import org.apache.slider.api.proto.SliderClusterAPI;
 import org.apache.slider.api.types.ApplicationDiagnostics;
@@ -1017,7 +1018,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
       log.error("Exception : {}", e, e);
       // call the AM stop command as if it had been queued (but without
       // going via the queue, which may not have started
-      onAMStop(new ActionStopSlider(e));
+      ActionStopSlider stopSlider = new ActionStopSlider(e);
+      stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+      onAMStop(stopSlider);
     }
     //shutdown time
     return finish();
@@ -1617,6 +1620,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
     ApplicationDiagnostics appDiagnostics = getApplicationDiagnostics();
     appDiagnostics.setFinalStatus(appStatus);
     appDiagnostics.setFinalMessage(finalMessage);
+    appDiagnostics.setExitReason(stopAction.getExitReason());
     String appMessage = appDiagnostics.toString();
     try {
       log.info("Unregistering AM status={} message={}", appStatus, appMessage);
@@ -1961,7 +1965,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
     } catch (TriggerClusterTeardownException e) {
       //App state has decided that it is time to exit
       log.error("Cluster teardown triggered {}", e, e);
-      queue(new ActionStopSlider(e));
+      ActionStopSlider stopSlider = new ActionStopSlider(e);
+      stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+      queue(stopSlider);
     }
   }
 
@@ -2028,10 +2034,10 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
   @Override //AMRMClientAsync
   public void onShutdownRequest() {
     LOG_YARN.info("Shutdown Request received");
-    signalAMComplete(new ActionStopSlider("stop",
-        EXIT_SUCCESS,
-        FinalApplicationStatus.SUCCEEDED,
-        "Shutdown requested from RM"));
+    ActionStopSlider stopSlider = new ActionStopSlider("stop", EXIT_SUCCESS,
+        FinalApplicationStatus.SUCCEEDED, "Shutdown requested from RM");
+    stopSlider.setExitReason(SliderExitReason.YARN_ERROR);
+    signalAMComplete(stopSlider);
   }
 
   /**
@@ -2069,9 +2075,11 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
     if (e instanceof InvalidResourceRequestException) {
       // stop the cluster
       LOG_YARN.error("AMRMClientAsync.onError() received {}", e, e);
-      signalAMComplete(new ActionStopSlider("stop", EXIT_EXCEPTION_THROWN,
-          FinalApplicationStatus.FAILED,
-          SliderUtils.extractFirstLine(e.getLocalizedMessage())));
+      ActionStopSlider stopSlider = new ActionStopSlider("stop",
+          EXIT_EXCEPTION_THROWN, FinalApplicationStatus.FAILED,
+          SliderUtils.extractFirstLine(e.getLocalizedMessage()));
+      stopSlider.setExitReason(SliderExitReason.APP_ERROR);
+      signalAMComplete(stopSlider);
     } else if (e instanceof InvalidApplicationMasterRequestException) {
       // halt the AM
       LOG_YARN.error("AMRMClientAsync.onError() received {}", e, e);
@@ -2165,7 +2173,9 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
       // cluster flex failure: log
       log.error("Failed to flex cluster nodes: {}", e, e);
       // then what? exit
-      queue(new ActionStopSlider(e));
+      ActionStopSlider stopSlider = new ActionStopSlider(e);
+      stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+      queue(stopSlider);
     }
   }
 
@@ -2217,6 +2227,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
             mappedProcessExitCode,
             FinalApplicationStatus.FAILED,
             reason);
+        stop.setExitReason(SliderExitReason.YARN_ERROR);
         //this wasn't expected: the process finished early
         spawnedProcessExitedBeforeShutdownTriggered = true;
         log.info(
@@ -2392,9 +2403,11 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
       if (exception instanceof ExitCodeProvider) {
         exitCode = ((ExitCodeProvider) exception).getExitCode();
       }
-      signalAMComplete(
-          new ActionStopSlider("stop", exitCode, FinalApplicationStatus.FAILED,
-              SliderUtils.extractFirstLine(exception.getLocalizedMessage())));
+      ActionStopSlider stopSlider = new ActionStopSlider("stop", exitCode,
+          FinalApplicationStatus.FAILED,
+          SliderUtils.extractFirstLine(exception.getLocalizedMessage()));
+      stopSlider.setExitReason(SliderExitReason.SLIDER_AM_ERROR);
+      signalAMComplete(stopSlider);
     }
   }
 
@@ -2451,6 +2464,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
           LauncherExitCodes.EXIT_FALSE,
           FinalApplicationStatus.FAILED,
           E_TRIGGERED_LAUNCH_FAILURE);
+      stop.setExitReason(SliderExitReason.CHAOS_MONKEY);
       queue(stop);
     }
     

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
index 055cea5..6d9e466 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java
@@ -19,6 +19,7 @@
 package org.apache.slider.server.appmaster.actions;
 
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.slider.api.SliderExitReason;
 import org.apache.slider.core.exceptions.ExceptionConverter;
 import org.apache.slider.core.exceptions.TriggerClusterTeardownException;
 import org.apache.slider.core.main.ExitCodeProvider;
@@ -37,6 +38,7 @@ public class ActionStopSlider extends AsyncAction {
   private FinalApplicationStatus finalApplicationStatus;
   private String message;
   private final Exception ex;
+  private SliderExitReason exitReason;
 
   /**
    * Simple constructor
@@ -159,4 +161,12 @@ public class ActionStopSlider extends AsyncAction {
   public Exception getEx() {
     return ex;
   }
+
+  public SliderExitReason getExitReason() {
+    return exitReason;
+  }
+
+  public void setExitReason(SliderExitReason exitReason) {
+    this.exitReason = exitReason;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
index fda23aa..1496ed0 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.SliderClusterProtocol;
+import org.apache.slider.api.SliderExitReason;
 import org.apache.slider.api.proto.Messages;
 import org.apache.slider.api.types.ApplicationLivenessInformation;
 import org.apache.slider.api.types.ComponentInformation;
@@ -184,6 +185,7 @@ public class SliderIPCService extends AbstractService
             LauncherExitCodes.EXIT_SUCCESS,
             FinalApplicationStatus.SUCCEEDED,
             message);
+    stopSlider.setExitReason(SliderExitReason.STOP_COMMAND_ISSUED);
     log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
     schedule(stopSlider);
     return Messages.StopClusterResponseProto.getDefaultInstance();

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
index 544f589..02e2295 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/actions/RestActionStop.java
@@ -19,6 +19,7 @@
 package org.apache.slider.server.appmaster.web.rest.application.actions;
 
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.slider.api.SliderExitReason;
 import org.apache.slider.core.main.LauncherExitCodes;
 import org.apache.slider.server.appmaster.actions.ActionStopSlider;
 import org.apache.slider.server.appmaster.web.WebAppApi;
@@ -59,6 +60,7 @@ public class RestActionStop {
             LauncherExitCodes.EXIT_SUCCESS,
             FinalApplicationStatus.SUCCEEDED,
             text);
+    stopSlider.setExitReason(SliderExitReason.STOP_COMMAND_ISSUED);
     log.info("SliderAppMasterApi.stopCluster: {}", stopSlider);
     slider.getQueues().schedule(stopSlider);
     

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/134ef53f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index faeb0a1..e8e2791 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -22,12 +22,15 @@ import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.slider.api.ClusterDescription
+import org.apache.slider.api.SliderExitReason;
 import org.apache.slider.api.StatusKeys
+import org.apache.slider.api.types.ApplicationDiagnostics;
 import org.apache.slider.client.SliderClient
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.SliderXmlConfKeys
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
+import org.apache.slider.core.launch.SerializedApplicationReport
 import org.apache.slider.funtest.ResourcePaths
 import org.apache.slider.funtest.framework.AgentCommandTestBase
 import org.apache.slider.funtest.framework.FuntestProperties
@@ -145,6 +148,12 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
       // should be in finished state, as this was a clean shutdown
       assertInYarnState(appId, YarnApplicationState.FINISHED)
 
+      // Get diagnostics and validate that exitReason is STOP_COMMAND_ISSUED
+      SerializedApplicationReport appReport = lookupApplication(appId)
+      log.info("Application Report {}", appReport);
+      ApplicationDiagnostics appDiagnostics = ApplicationDiagnostics.fromJson(appReport.diagnostics)
+      assert appDiagnostics.exitReason == SliderExitReason.STOP_COMMAND_ISSUED
+
       //cluster exists if you don't want it to be live
       exists(0, CLUSTER, false)
       //condition returns false if it is required to be live