You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2014/10/31 15:25:18 UTC

[10/13] git commit: SLIDER-570 migrate tests to app-id based startup cycle

SLIDER-570 migrate tests to app-id based startup cycle


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/e7df654f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/e7df654f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/e7df654f

Branch: refs/heads/develop
Commit: e7df654f29e414b822a93738321940dc8f164114
Parents: 1cd37f5
Author: Steve Loughran <st...@apache.org>
Authored: Thu Oct 30 22:40:14 2014 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Fri Oct 31 11:07:50 2014 +0000

----------------------------------------------------------------------
 .../apache/slider/core/conf/MapOperations.java  |  14 +-
 .../server/appmaster/SliderAppMaster.java       |   2 +
 .../slider/core/conf/TestConfTreeResolve.groovy |  22 ++-
 .../slider/core/conf/examples/internal.json     |   5 +-
 .../funtest/framework/CommandTestBase.groovy    | 193 ++++++++++++++-----
 .../funtest/lifecycle/AMFailuresIT.groovy       |  16 +-
 .../lifecycle/AgentClusterLifecycleIT.groovy    |   2 +-
 .../funtest/lifecycle/AgentFailures2IT.groovy   |  16 +-
 .../funtest/lifecycle/AgentFailuresIT.groovy    |  16 +-
 .../lifecycle/AgentLaunchFailureIT.groovy       |  34 ++--
 .../funtest/lifecycle/AgentRegistryIT.groovy    |  16 +-
 .../funtest/lifecycle/AppsThroughAgentIT.groovy |  15 +-
 .../AppsThroughAgentQueueAndLabelsIT.groovy     |  22 ++-
 13 files changed, 259 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
index de8fc2c..5f7b5f0 100644
--- a/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
+++ b/slider-core/src/main/java/org/apache/slider/core/conf/MapOperations.java
@@ -38,6 +38,10 @@ import java.util.Set;
 public class MapOperations implements Map<String, String> {
   private static final Logger log =
     LoggerFactory.getLogger(MapOperations.class);
+  public static final String DAYS = ".days";
+  public static final String HOURS = ".hours";
+  public static final String MINUTES = ".minutes";
+  public static final String SECONDS = ".seconds";
 
   /**
    * Global options
@@ -277,7 +281,7 @@ public class MapOperations implements Map<String, String> {
 
   /**
    * Get the time range of a set of keys
-   * @param basekey
+   * @param basekey base key to which suffix gets applied
    * @param defDays
    * @param defHours
    * @param defMins
@@ -290,11 +294,11 @@ public class MapOperations implements Map<String, String> {
       int defMins,
       int defSecs) {
     Preconditions.checkArgument(basekey != null);
-    int days = getOptionInt(basekey + ".days", defDays);
-    int hours = getOptionInt(basekey + ".hours", defHours);
+    int days = getOptionInt(basekey + DAYS, defDays);
+    int hours = getOptionInt(basekey + HOURS, defHours);
 
-    int minutes = getOptionInt(basekey + ".minutes", defMins);
-    int seconds = getOptionInt(basekey + ".seconds", defSecs);
+    int minutes = getOptionInt(basekey + MINUTES, defMins);
+    int seconds = getOptionInt(basekey + SECONDS, defSecs);
     // range check
     Preconditions.checkState(days >= 0 && hours >= 0 && minutes >= 0
                              && seconds >= 0,

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index 5e2ba11..f2ea00f 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -2199,12 +2199,14 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
         InternalKeys.CHAOS_MONKEY_PROBABILITY_AM_LAUNCH_FAILURE,
         0);
     if (amLaunchFailProbability> 0 && monkey.chaosCheck(amLaunchFailProbability)) {
+      log.info("Chaos Monkey has triggered AM Launch failure");
       // trigger a failure
       ActionStopSlider stop = new ActionStopSlider("stop",
           0, TimeUnit.SECONDS,
           LauncherExitCodes.EXIT_FALSE,
           FinalApplicationStatus.FAILED,
           E_TRIGGERED_LAUNCH_FAILURE);
+      queue(stop);
     }
     
     int amKillProbability = internals.getOptionInt(

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-core/src/test/groovy/org/apache/slider/core/conf/TestConfTreeResolve.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/core/conf/TestConfTreeResolve.groovy b/slider-core/src/test/groovy/org/apache/slider/core/conf/TestConfTreeResolve.groovy
index b655be8..156ae71 100644
--- a/slider-core/src/test/groovy/org/apache/slider/core/conf/TestConfTreeResolve.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/core/conf/TestConfTreeResolve.groovy
@@ -19,10 +19,11 @@
 package org.apache.slider.core.conf
 
 import groovy.util.logging.Slf4j
+import static org.apache.slider.api.InternalKeys.*
 import org.junit.Assert
 import org.junit.Test
 
-import static org.apache.slider.core.conf.ExampleConfResources.overridden
+import static org.apache.slider.core.conf.ExampleConfResources.*
 
 /**
  * Test 
@@ -89,4 +90,23 @@ class TestConfTreeResolve extends Assert {
     assert worker["timeout"] == "1000"
 
   }
+
+  @Test
+  public void testTimeIntervalLoading() throws Throwable {
+
+    def orig = ExampleConfResources.loadResource(internal)
+
+    MapOperations internals = new MapOperations(orig.global)
+    def s = internals.getOptionInt(
+        CHAOS_MONKEY_INTERVAL + MapOperations.SECONDS,
+        0)
+    assert s == 60
+    long monkeyInterval = internals.getTimeRange(
+        CHAOS_MONKEY_INTERVAL,
+        DEFAULT_CHAOS_MONKEY_INTERVAL_DAYS,
+        DEFAULT_CHAOS_MONKEY_INTERVAL_HOURS,
+        DEFAULT_CHAOS_MONKEY_INTERVAL_MINUTES,
+        0);
+    assert monkeyInterval == 60;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-core/src/test/resources/org/apache/slider/core/conf/examples/internal.json
----------------------------------------------------------------------
diff --git a/slider-core/src/test/resources/org/apache/slider/core/conf/examples/internal.json b/slider-core/src/test/resources/org/apache/slider/core/conf/examples/internal.json
index 8617d1f..4c782fb 100644
--- a/slider-core/src/test/resources/org/apache/slider/core/conf/examples/internal.json
+++ b/slider-core/src/test/resources/org/apache/slider/core/conf/examples/internal.json
@@ -7,7 +7,10 @@
   "global": {
     "application.name": "small_cluster",
     "application.type": "hbase",
-    "application": "hdfs://cluster:8020/apps/hbase/v/1.0.0/application.tar"
+    "application": "hdfs://cluster:8020/apps/hbase/v/1.0.0/application.tar",
+    "internal.chaos.monkey.probability.amlaunchfailure": "10000",
+    "internal.chaos.monkey.interval.seconds": "60",
+    "internal.chaos.monkey.enabled": "true"
   },
   "components": {
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 7c479af..0a7b295 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.slider.api.StatusKeys
 import org.apache.slider.common.tools.ConfigHelper
+import org.apache.slider.common.tools.Duration
 import org.apache.slider.core.launch.SerializedApplicationReport
 import org.apache.slider.core.main.ServiceLauncher
 import org.apache.slider.common.SliderKeys
@@ -90,7 +91,12 @@ abstract class CommandTestBase extends SliderTestUtils {
    * not need to be escaped
    */
   public static final String TILDE
-  
+  public static final int CONTAINER_LAUNCH_TIMEOUT = 90000
+  public static final int PROBE_SLEEP_TIME = 4000
+  public static final int REGISTRY_STARTUP_TIMEOUT = 60000
+  public static
+  final String E_LAUNCH_FAIL = 'Application did not start'
+
   /*
   Static initializer for test configurations. If this code throws exceptions
   (which it may) the class will not be instantiable.
@@ -710,6 +716,16 @@ abstract class CommandTestBase extends SliderTestUtils {
     }    
     return null;
   }  
+   
+  public static SerializedApplicationReport loadAppReport(File reportFile) {
+    if (reportFile.exists() && reportFile.length()> 0) {
+      ApplicationReportSerDeser serDeser = new ApplicationReportSerDeser()
+      def report = serDeser.fromFile(reportFile)
+      return report
+    }  else {
+      throw new FileNotFoundException(reportFile.absolutePath)
+    }  
+  }  
   
   public static SerializedApplicationReport maybeLookupFromLaunchReport(File launchReport) {
     def report = maybeLoadAppReport(launchReport)
@@ -776,9 +792,9 @@ abstract class CommandTestBase extends SliderTestUtils {
   }
 
   protected void ensureRegistryCallSucceeds(String application) {
-    repeatUntilTrue(this.&isRegistryAccessible,
-        10,
-        5 * 1000,
+    repeatUntilSuccess(this.&isRegistryAccessible,
+        REGISTRY_STARTUP_TIMEOUT,
+        PROBE_SLEEP_TIME,
         [application: application],
         true,
         'Application registry is not accessible, failing test.') {
@@ -789,35 +805,36 @@ abstract class CommandTestBase extends SliderTestUtils {
 
    
   protected void ensureApplicationIsUp(String application) {
-    repeatUntilTrue(this.&isApplicationRunning,
-        30,
+    repeatUntilSuccess(this.&isApplicationRunning,
         SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME,
-            DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS),
+            DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS) * 1000,
+        PROBE_SLEEP_TIME,
         [application: application],
         true,
-        'Application did not start, failing test.') {
+        E_LAUNCH_FAIL) {
       describe "final state of app that tests say is not up"
       exists(application, true).dumpOutput()
     }
   }
 
-  protected boolean isRegistryAccessible(Map<String, String> args) {
+  protected Outcome isRegistryAccessible(Map<String, String> args) {
     String applicationName = args['application'];
     SliderShell shell = slider(
         [
             ACTION_REGISTRY,
             ARG_NAME,
             applicationName,
-            ARG_LISTEXP])
+            ARG_LISTEXP
+        ])
     if (EXIT_SUCCESS != shell.execute()) {
       logShell(shell)
     }
-    return EXIT_SUCCESS == shell.execute()
+    return Outcome.fromBool(EXIT_SUCCESS == shell.execute())
   }
 
-  protected boolean isApplicationRunning(Map<String, String> args) {
+  protected Outcome isApplicationRunning(Map<String, String> args) {
     String applicationName = args['application'];
-    return isApplicationUp(applicationName);
+    return Outcome.fromBool(isApplicationUp(applicationName))
   }
 
   protected boolean isApplicationUp(String applicationName) {
@@ -827,18 +844,6 @@ abstract class CommandTestBase extends SliderTestUtils {
     );
   }
 
-  protected void ensureYarnApplicationIsUp(String application) {
-    repeatUntilTrue(this.&isApplicationRunning,
-        30,
-        SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME,
-            DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS),
-        [application: application],
-        true,
-        'Application did not start, failing test.') {
-      describe "final state of app that tests say is not up"
-      exists(application, true).dumpOutput()
-    }
-  }
   
   /**
    * is an application in a desired yarn state 
@@ -853,27 +858,111 @@ abstract class CommandTestBase extends SliderTestUtils {
       [ACTION_EXISTS, applicationName, ARG_STATE, yarnState.toString()])
     return shell.ret == 0
   }
-  
+
+
+  protected Outcome isYarnApplicationRunning(Map<String, String> args) {
+    String applicationId = args['applicationId'];
+    return isYarnApplicationRunning(applicationId)
+  }
+
   /**
    * is a yarn application in a desired yarn state 
    * @param yarnState
    * @param applicationName
-   * @return
+   * @return an outcome indicating whether the app is at the state, on its way
+   * or has gone past
    */
-  public static boolean isYarnApplicationInState(
-      String applicationId,
-      YarnApplicationState yarnState) {
+  public static Outcome isYarnApplicationRunning(
+      String applicationId) {
+    YarnApplicationState appState = lookupYarnAppState(applicationId)
+    YarnApplicationState yarnState = YarnApplicationState.RUNNING
+    if (yarnState == appState) {
+      return Outcome.Success;
+    }
+    
+    if (appState.ordinal() > yarnState.ordinal()) {
+      // app has passed beyond hope
+      return Outcome.Fail
+    }
+    return Outcome.Retry
+  }
+
+  public static YarnApplicationState lookupYarnAppState(String applicationId) {
     def sar = lookupApplication(applicationId)
     assert sar != null;
-    return yarnState.toString() == sar.state
+    YarnApplicationState appState = YarnApplicationState.valueOf(sar.state)
+    return appState
+  }
+
+  public static void assertInYarnState(String applicationId,
+      YarnApplicationState expectedState) {
+    def applicationReport = lookupApplication(applicationId)
+    assert expectedState.toString() == applicationReport.state 
   }
 
+  /**
+   * Wait for the YARN app to come up. This will fail fast
+   * @param launchReportFile launch time file containing app id
+   * @return the app ID
+   */
+  protected String ensureYarnApplicationIsUp(File launchReportFile) {
+    def id = loadAppReport(launchReportFile).applicationId
+    ensureYarnApplicationIsUp(id)
+    return id;
+  }
+  /**
+   * Wait for the YARN app to come up. This will fail fast
+   * @param applicationId
+   */
+  protected void ensureYarnApplicationIsUp(String applicationId) {
+    repeatUntilSuccess(this.&isYarnApplicationRunning,
+        SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME,
+            DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS),
+        PROBE_SLEEP_TIME,
+        [applicationId: applicationId],
+        true,
+        E_LAUNCH_FAIL) {
+      describe "final state of app that tests say is not up"
+      def sar = lookupApplication(applicationId)
+
+      def message = E_LAUNCH_FAIL + "\n$sar"
+      log.error(message)
+      fail(message)
+    }
+  }
+
+  /**
+   * Outcome for probes
+   */
+  static class Outcome {
+
+    public final String name;
+
+    private Outcome(String name) {
+      this.name = name
+    }
+
+    static Outcome Success = new Outcome("Success")
+    static Outcome Retry = new Outcome("Retry")
+    static Outcome Fail = new Outcome("Fail")
+
+
+    /**
+     * build from a bool, where false is mapped to retry
+     * @param b boolean
+     * @return an outcome
+     */
+    static Outcome fromBool(boolean b) {
+      return b? Success: Retry;
+    }
+
+  }
   
   /**
    * Repeat a probe until it succeeds, if it does not execute a failure
    * closure then raise an exception with the supplied message
    * @param probe probe
-   * @param maxAttempts max number of attempts
+   * @param timeout time in millis before giving up
    * @param sleepDur sleep between failing attempts
    * @param args map of arguments to the probe
    * @param failIfUnsuccessful if the probe fails after all the attempts
@@ -881,23 +970,35 @@ abstract class CommandTestBase extends SliderTestUtils {
    * @param failureMessage message to include in exception raised
    * @param failureHandler closure to invoke prior to the failure being raised
    */
-  protected void repeatUntilTrue(Closure probe,
-      int maxAttempts, int sleepDur, Map args,
-      boolean failIfUnsuccessful = false,
+  protected void repeatUntilSuccess(Closure probe,
+      int timeout, int sleepDur,
+      Map args,
+      boolean failIfUnsuccessful,
       String failureMessage,
       Closure failureHandler) {
     int attemptCount = 0
     boolean succeeded = false;
-    while (attemptCount < maxAttempts) {
-      if (probe(args)) {
-        // finished
+    boolean completed = false;
+    Duration duration = new Duration(timeout)
+    duration.start();
+    while (!completed) {
+      Outcome outcome = (Outcome) probe(args)
+      if (outcome.equals(Outcome.Success)) {
+        // success
         log.debug("Success after $attemptCount attempt(s)")
         succeeded = true;
-        break
-      };
-      attemptCount++;
-
-      sleep(sleepDur)
+        completed = true;
+      } else if (outcome.equals(Outcome.Retry)) {
+        // failed but retry possible
+        attemptCount++;
+        completed = duration.limitExceeded
+        if (!completed) {
+          sleep(sleepDur)
+        }
+      } else if (outcome.equals(Outcome.Fail)) {
+        // fast fail
+          completed = true;
+      }
     }
     
     if (failIfUnsuccessful & !succeeded) {
@@ -962,10 +1063,10 @@ abstract class CommandTestBase extends SliderTestUtils {
 
   void expectContainerRequestedCountReached(String application, String role, int limit) {
 
-    repeatUntilTrue(
+    repeatUntilSuccess(
         this.&hasRequestedContainerCountReached,
-        90,
-        1000,
+        CONTAINER_LAUNCH_TIMEOUT,
+        PROBE_SLEEP_TIME,
         [limit      : Integer.toString(limit),
          role       : role,
          application: application],

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy
index 750a3d4..2e28c84 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AMFailuresIT.groovy
@@ -61,12 +61,15 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
   @Test
   public void testAMKilledWithStateAMStartedAgentsStarted() throws Throwable {
     cleanup(APPLICATION_NAME)
+    File launchReportFile = createAppReportFile();
+
     SliderShell shell = createTemplatedSliderApplication(
-        APPLICATION_NAME, APP_TEMPLATE, APP_RESOURCE
-    )
+        APPLICATION_NAME, APP_TEMPLATE, APP_RESOURCE,
+        [],
+        launchReportFile)
     logShell(shell)
 
-    ensureApplicationIsUp(APPLICATION_NAME)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
     expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1)
     
     // Wait for 20 secs for AM and agent to both reach STARTED state
@@ -92,13 +95,12 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     killAMUsingVagrantShell()
 
     // Check that the application is not running (and is in ACCEPTED state)
-    assert isApplicationInState(APPLICATION_NAME, YarnApplicationState.ACCEPTED
-    ), 
+    assert lookupYarnAppState(appId) == YarnApplicationState.ACCEPTED ,
       'App should be in ACCEPTED state (since AM got killed)'
     log.info("After AM KILL: application {} is in ACCEPTED state", APPLICATION_NAME)
 
     // Wait until AM comes back up and verify container count again
-    ensureApplicationIsUp(APPLICATION_NAME)
+    ensureYarnApplicationIsUp(appId)
 
     // There should be exactly 1 live logger container
     def cd2 = expectContainersLive(APPLICATION_NAME, COMMAND_LOGGER, 1)
@@ -107,7 +109,7 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     def loggerStats2 = cd2.statistics[COMMAND_LOGGER]
     assert origRequested == loggerStats2["containers.requested"],
         'No new agent containers should be requested'
-    assert isApplicationUp(APPLICATION_NAME), 'App is not running.'
+    assert lookupYarnAppState(appId) == YarnApplicationState.RUNNING 
   }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index 3783a37..3e5cec7 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -78,7 +78,7 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
     def launchReport = maybeLoadAppReport(launchReportFile)
     assert launchReport;
 
-    ensureApplicationIsUp(CLUSTER)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
 
     //at this point the cluster should exist.
     assertPathExists(clusterFS, "Cluster parent directory does not exist", clusterpath.parent)

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
index 9b35fa4..6c6b52b 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailures2IT.groovy
@@ -20,6 +20,7 @@ package org.apache.slider.funtest.lifecycle
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
@@ -50,21 +51,20 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     assumeAgentTestsEnabled()
     
     cleanup(APPLICATION_NAME)
-    SliderShell shell = createTemplatedSliderApplication(
-        APPLICATION_NAME,
-        APP_TEMPLATE3,
-        APP_RESOURCE)
+    File launchReportFile = createAppReportFile();
 
+    SliderShell shell = createTemplatedSliderApplication(
+        APPLICATION_NAME, APP_TEMPLATE3, APP_RESOURCE,
+        [],
+        launchReportFile)
     logShell(shell)
 
-    ensureApplicationIsUp(APPLICATION_NAME)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
     expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 3)
     sleep(1000 * 20)
     def cd = execStatus(APPLICATION_NAME)
     assert cd.statistics[COMMAND_LOGGER]["containers.requested"] >= 3
-
-    assert isApplicationUp(APPLICATION_NAME), 'App is not running.'
-
+    assertInYarnState(appId, YarnApplicationState.RUNNING)
   }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index 3847e3f..d5be0f8 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -20,11 +20,13 @@ package org.apache.slider.funtest.lifecycle
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
 import org.apache.slider.funtest.framework.AgentCommandTestBase
 import org.apache.slider.funtest.framework.FuntestProperties
+import org.apache.slider.funtest.framework.SliderShell
 import org.junit.After
 import org.junit.Test
 
@@ -50,20 +52,24 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     }
 
     cleanup(APPLICATION_NAME)
-    def shell = createTemplatedSliderApplication(APPLICATION_NAME,
-        APP_TEMPLATE2,
-        APP_RESOURCE)
 
+    File launchReportFile = createAppReportFile();
+    SliderShell shell = createTemplatedSliderApplication(
+        APPLICATION_NAME,
+        APP_TEMPLATE2,
+        APP_RESOURCE,
+        [],
+        launchReportFile)
     logShell(shell)
 
-    ensureApplicationIsUp(APPLICATION_NAME)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
 
     expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 2)
     sleep(1000 * 20)
     assert isApplicationUp(APPLICATION_NAME), 'App is not running.'
     def cd = expectContainersLive(APPLICATION_NAME, COMMAND_LOGGER, 1)
     assert cd.statistics[COMMAND_LOGGER]["containers.requested"] >= 2
-    assert isApplicationUp(APPLICATION_NAME), 'App is not running.'
+    assertInYarnState(appId, YarnApplicationState.RUNNING)
   }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
index 4dcbed0..5087c06 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentLaunchFailureIT.groovy
@@ -20,6 +20,8 @@ package org.apache.slider.funtest.lifecycle
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.slider.server.appmaster.SliderAppMaster
+
 import static org.apache.slider.api.InternalKeys.*
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
@@ -65,33 +67,33 @@ public class AgentLaunchFailureIT extends AgentCommandTestBase
         APP_RESOURCE2,
         [
             ARG_OPTION, CHAOS_MONKEY_ENABLED, "true",
+            ARG_OPTION, CHAOS_MONKEY_INTERVAL_SECONDS, "60",
             ARG_OPTION, CHAOS_MONKEY_PROBABILITY_AM_LAUNCH_FAILURE, 
              Integer.toString(PROBABILITY_PERCENT_100),
         ],
         launchReportFile)
 
-    shell.dumpOutput();
     assert launchReportFile.exists()
     assert launchReportFile.size() > 0
     def launchReport = maybeLoadAppReport(launchReportFile)
     assert launchReport;
     assert launchReport.applicationId;
-    def report = maybeLookupFromLaunchReport(launchReportFile)
-    assert report;
-    ensureApplicationIsUp(CLUSTER)
-
-    //stop
-    freeze(0, CLUSTER,
-        [
-            ARG_FORCE,
-            ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
-            ARG_MESSAGE, "final-shutdown"
-        ])
-
-    destroy(0, CLUSTER)
 
-    //cluster now missing
-    exists(EXIT_UNKNOWN_INSTANCE, CLUSTER)
+    // spin expecting failure
+    def appId = launchReport.applicationId
+    sleep(5000)
+    describe("Awaiting failure")
+    try {
+      ensureYarnApplicationIsUp(appId)
+      fail("application is up")
+    } catch (AssertionError e) {
+      if(!e.toString().contains(SliderAppMaster.E_TRIGGERED_LAUNCH_FAILURE)) {
+        throw e;
+      }
+    }
+    def sar = lookupApplication(appId)
+    log.info(sar.toString())
+    assert sar.diagnostics.contains(SliderAppMaster.E_TRIGGERED_LAUNCH_FAILURE)
 
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
index 16e65fa..b5ee23d 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentRegistryIT.groovy
@@ -23,6 +23,7 @@ import groovy.util.logging.Slf4j
 import org.apache.hadoop.registry.client.binding.RegistryUtils
 import org.apache.hadoop.registry.client.types.Endpoint
 import org.apache.hadoop.registry.client.types.ServiceRecord
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.SliderKeys
 import org.apache.slider.common.params.Arguments
@@ -59,19 +60,17 @@ public class AgentRegistryIT extends AgentCommandTestBase
   @Test
   public void testAgentRegistry() throws Throwable {
     describe("Create a 0-role cluster and make registry queries against it")
-
-    // sanity check to verify the config is correct
-    assert clusterFS.uri.scheme != "file"
-
     def clusterpath = buildClusterPath(CLUSTER)
-    assert !clusterFS.exists(clusterpath)
+    File launchReportFile = createAppReportFile();
     SliderShell shell = createTemplatedSliderApplication(CLUSTER,
         APP_TEMPLATE,
-        APP_RESOURCE2)
+        APP_RESOURCE2,
+        [],
+        launchReportFile)
 
     logShell(shell)
 
-    ensureApplicationIsUp(CLUSTER)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
 
     //at this point the cluster should exist.
     assertPathExists(
@@ -124,15 +123,16 @@ public class AgentRegistryIT extends AgentCommandTestBase
     //stop
     freeze(0, CLUSTER,
         [
-            ARG_FORCE,
             ARG_WAIT, Integer.toString(FREEZE_WAIT_TIME),
             ARG_MESSAGE, "final-shutdown"
         ])
 
+    assertInYarnState(appId, YarnApplicationState.FINISHED)
     destroy(0, CLUSTER)
 
     //cluster now missing
     exists(EXIT_UNKNOWN_INSTANCE, CLUSTER)
 
+
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
index 488173b..5a5b964 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentIT.groovy
@@ -20,6 +20,7 @@ package org.apache.slider.funtest.lifecycle
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
@@ -53,13 +54,15 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     assumeAgentTestsEnabled()
 
     cleanup(APPLICATION_NAME)
+    File launchReportFile = createAppReportFile();
     SliderShell shell = createTemplatedSliderApplication(APPLICATION_NAME,
         APP_TEMPLATE,
-        APP_RESOURCE)
-
+        APP_RESOURCE,
+        [],
+        launchReportFile)
     logShell(shell)
 
-    ensureApplicationIsUp(APPLICATION_NAME)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
 
     //flex
     slider(EXIT_SUCCESS,
@@ -107,8 +110,8 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
             APPLICATION_NAME,
             ARG_GETEXP,
             "container_log_dirs"])
-    if(!containsString(shell, "\"tag\" : \"COMMAND_LOGGER\"", 2)
-    || !containsString(shell, "\"level\" : \"component\"", 2)) {
+    if (!containsString(shell, "\"tag\" : \"COMMAND_LOGGER\"", 2)
+        || !containsString(shell, "\"level\" : \"component\"", 2)) {
       logShell(shell)
       assert fail("Should list 2 entries for log folders")
     }
@@ -159,6 +162,6 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
       assert fail("Should have exported cl-site")
     }
 
-    assert isApplicationUp(APPLICATION_NAME), 'App is not running.'
+    assertInYarnState(appId,  YarnApplicationState.RUNNING)
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/e7df654f/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentQueueAndLabelsIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentQueueAndLabelsIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentQueueAndLabelsIT.groovy
index ec999f5..ee418dc 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentQueueAndLabelsIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AppsThroughAgentQueueAndLabelsIT.groovy
@@ -20,6 +20,7 @@ package org.apache.slider.funtest.lifecycle
 
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.YarnApplicationState
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
@@ -80,15 +81,16 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     assumeLabelsRedAndBlueAdded()
 
     cleanup(APPLICATION_NAME)
-    SliderShell shell = createTemplatedSliderApplication(APPLICATION_NAME,
+    File launchReportFile = createAppReportFile();
+    SliderShell shell = createTemplatedSliderApplication(
+        APPLICATION_NAME,
         APP_TEMPLATE,
         APP_RESOURCE4,
-        [ARG_QUEUE, TARGET_QUEUE]
-    )
-
+        [ARG_QUEUE, TARGET_QUEUE],
+        launchReportFile)
     logShell(shell)
 
-    ensureApplicationIsUp(APPLICATION_NAME)
+    def appId = ensureYarnApplicationIsUp(launchReportFile)
 
     expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 1 )
     expectContainersLive(APPLICATION_NAME, COMMAND_LOGGER, 1)
@@ -100,18 +102,18 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
             APPLICATION_NAME,
             ARG_COMPONENT,
             COMMAND_LOGGER,
-            "3"])
+            "3"
+        ])
 
-    // sleep till the new instance starts
-    ensureApplicationIsUp(APPLICATION_NAME)
+    // spin till the flexed instance starts
+    ensureYarnApplicationIsUp(appId)
     expectContainerRequestedCountReached(APPLICATION_NAME, COMMAND_LOGGER, 3)
 
 
     sleep(1000 * 20)
     def cd = execStatus(APPLICATION_NAME)
     assert cd.statistics[COMMAND_LOGGER]["containers.requested"] >= 3
-
-    assert isApplicationUp(APPLICATION_NAME), 'App is not running.'
+    assertInYarnState(appId, YarnApplicationState.RUNNING)
   }