You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2014/10/23 12:40:46 UTC

[07/10] git commit: SLIDER-460 probes working more reliably

SLIDER-460 probes working more reliably


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/9c5ac0ff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/9c5ac0ff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/9c5ac0ff

Branch: refs/heads/develop
Commit: 9c5ac0ffa0c4ea90d50ea5cbce71278b3a6d4098
Parents: 75030d2
Author: Steve Loughran <st...@apache.org>
Authored: Thu Oct 23 10:48:42 2014 +0100
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Oct 23 11:35:55 2014 +0100

----------------------------------------------------------------------
 .../framework/AgentCommandTestBase.groovy       |  79 +------------
 .../funtest/framework/CommandTestBase.groovy    | 117 +++++++++++++++++--
 .../lifecycle/AgentClusterLifecycleIT.groovy    |   4 +-
 .../funtest/lifecycle/AgentFailuresIT.groovy    |   1 -
 .../src/test/resources/log4j.properties         |   2 +-
 5 files changed, 113 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
index 4a46f8b..8af51b4 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
@@ -21,7 +21,6 @@ package org.apache.slider.funtest.framework
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.security.UserGroupInformation
-import org.apache.slider.api.ClusterDescription
 import org.apache.slider.common.SliderExitCodes
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
@@ -125,77 +124,6 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
     }
   }
 
-  public static void logShell(SliderShell shell) {
-    shell.dumpOutput();
-  }
-
-
-  public ClusterDescription execStatus(String application) {
-    ClusterDescription cd
-    File statusFile = File.createTempFile("status", ".json")
-    try {
-      SliderShell shell = slider(EXIT_SUCCESS,
-          [
-              ACTION_STATUS,
-              application,
-              ARG_OUTPUT, statusFile.absolutePath
-          ])
-
-      assert statusFile.exists()
-      cd = new ClusterDescription();
-      cd.fromFile(statusFile)
-      return cd
-    } finally {
-      statusFile.delete()
-    }
-  }
-
-  public int queryRequestedCount(String  application, String role) {
-    ClusterDescription cd = execStatus(application)
-    int requestedCount = cd.statistics[role]["containers.requested"]
-    return requestedCount
-  }
-
-  boolean hasRequestedContainerCountExceeded(Map<String, String> args) {
-    String application = args['application']
-    String role = args['role']
-    int expectedCount = args['limit'].toInteger();
-    return queryRequestedCount(application, role) >= expectedCount
-  }
-
-  void expectContainerCountExceeded(String application, String role, int limit) {
-
-    repeatUntilTrue(
-        this.&hasRequestedContainerCountExceeded,
-        50,
-        1000 * 10,
-        [limit      : Integer.toString(limit),
-         role       : role,
-         application: application],
-        true,
-        "countainer count not reached") {
-      status(application).dumpOutput()
-    };
-
-  }
-  public ClusterDescription expectContainersLive(String clustername,
-      String component,
-      int count) {
-    ClusterDescription cd = execStatus(clustername)
-    assertContainersLive(cd, component, count)
-    return cd;
-  }
-
-  public static void assertContainersLive(ClusterDescription clusterDescription,
-      String component, int count) {
-    log.info("Asserting component count.")
-    int instanceCount = clusterDescription.instances[component].size()
-    if (count != instanceCount) {
-      log.warn(clusterDescription.toString())
-    }
-    assert count == instanceCount 
-  }
-
   public static String findLineEntry(SliderShell shell, String[] locaters) {
     int index = 0;
     def output = shell.out
@@ -279,12 +207,9 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
       return
     }
 
-    log.info "Cleaning app instance, if exists, by name " + applicationName
+    describe "Teardown app instance " + applicationName
+    // forced freeze with wait
     teardown(applicationName)
-
-    // sleep till the instance is frozen
-    sleep(1000 * 3)
-
     SliderShell shell = slider([
         ACTION_DESTROY,
         applicationName])

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 44d07d8..4b75c56 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -37,7 +37,6 @@ import org.apache.slider.api.ClusterDescription
 import org.apache.slider.common.tools.SliderUtils
 import org.apache.slider.client.SliderClient
 import org.apache.slider.test.SliderTestUtils
-import org.junit.Assert
 import org.junit.Before
 import org.junit.BeforeClass
 import org.junit.Rule
@@ -45,6 +44,7 @@ import org.junit.rules.Timeout
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
 import static org.apache.slider.common.SliderExitCodes.*
+import static org.apache.slider.core.main.LauncherExitCodes.*
 import static org.apache.slider.funtest.framework.FuntestProperties.*
 import static org.apache.slider.common.params.Arguments.*
 import static org.apache.slider.common.params.SliderActions.*
@@ -169,6 +169,20 @@ abstract class CommandTestBase extends SliderTestUtils {
              "and YARN RM @ ${SLIDER_CONFIG.get(YarnConfiguration.RM_ADDRESS)}")
   }
 
+  public static void assertContainersLive(ClusterDescription clusterDescription,
+      String component, int count) {
+    log.info("Asserting component count.")
+    int instanceCount = clusterDescription.instances[component].size()
+    if (count != instanceCount) {
+      log.warn(clusterDescription.toString())
+    }
+    assert count == instanceCount 
+  }
+
+  public static void logShell(SliderShell shell) {
+    shell.dumpOutput();
+  }
+
   /**
    * give the test thread a name
    */
@@ -333,7 +347,7 @@ abstract class CommandTestBase extends SliderTestUtils {
   }
 
   static SliderShell freezeForce(String name) {
-    freeze(name, [ARG_FORCE])
+    freeze(name, [ARG_FORCE, ARG_WAIT, "10000"])
   }
 
   static SliderShell killContainer(String name, String containerID) {
@@ -643,9 +657,12 @@ abstract class CommandTestBase extends SliderTestUtils {
 
     sleep(5000)
     ensureApplicationIsUp(cluster)
+    
+/*
     def sleeptime = SLIDER_CONFIG.getInt(KEY_AM_RESTART_SLEEP_TIME,
         DEFAULT_AM_RESTART_SLEEP_TIME)
     sleep(sleeptime)
+*/
     ClusterDescription status
 
     status = sliderClient.clusterDescription
@@ -653,18 +670,19 @@ abstract class CommandTestBase extends SliderTestUtils {
   }
 
   protected void ensureApplicationIsUp(String application) {
-    repeatUntilTrue(this.&isApplicationUp,
+    repeatUntilTrue(this.&isApplicationRunning,
         SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME,
             DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS),
         1000,
         [application: application],
         true,
         'Application did not start, failing test.') {
+      describe "final state of app that tests say is not up"
       exists(application,true).dumpOutput()
     }
   }
 
-  protected boolean isApplicationUp(Map<String, String> args) {
+  protected boolean isApplicationRunning(Map<String, String> args) {
     String applicationName = args['application'];
     return isApplicationInState(YarnApplicationState.RUNNING, applicationName);
   }
@@ -686,14 +704,30 @@ abstract class CommandTestBase extends SliderTestUtils {
     return shell.ret == 0
   }
 
-  protected void repeatUntilTrue(Closure closure,
+  /**
+   * Repeat a probe until it succeeds, if it does not execute a failure
+   * closure then raise an exception with the supplied message
+   * @param probe probe
+   * @param maxAttempts max number of attempts
+   * @param sleepDur sleep between failing attempts
+   * @param args map of arguments to the probe
+   * @param failIfUnsuccessful if the probe fails after all the attempts
+   * —should it raise an exception
+   * @param failureMessage message to include in exception raised
+   * @param failureHandler closure to invoke prior to the failure being raised
+   */
+  protected void repeatUntilTrue(Closure probe,
       int maxAttempts, int sleepDur, Map args,
-      boolean failIfUnsuccessful = false, String message,
+      boolean failIfUnsuccessful = false,
+      String failureMessage,
       Closure failureHandler) {
     int attemptCount = 0
+    boolean succeeded = false;
     while (attemptCount < maxAttempts) {
-      if (closure(args)) {
+      if (probe(args)) {
         // finished
+        log.debug("Success after $attemptCount attempt(s)")
+        succeeded = true;
         break
       };
       attemptCount++;
@@ -701,12 +735,77 @@ abstract class CommandTestBase extends SliderTestUtils {
       sleep(sleepDur)
     }
     
-    if (failIfUnsuccessful & attemptCount != maxAttempts) {
+    if (failIfUnsuccessful & !succeeded) {
       if (failureHandler) {
         failureHandler()
       }
-      fail(message)
+      fail(failureMessage)
+    }
+  }
+
+  public ClusterDescription execStatus(String application) {
+    ClusterDescription cd
+    File statusFile = File.createTempFile("status", ".json")
+    try {
+      slider(EXIT_SUCCESS,
+          [
+              ACTION_STATUS,
+              application,
+              ARG_OUTPUT, statusFile.absolutePath
+          ])
+
+      assert statusFile.exists()
+      cd = new ClusterDescription();
+      cd.fromFile(statusFile)
+      return cd
+    } finally {
+      statusFile.delete()
     }
   }
 
+  public int queryRequestedCount(String  application, String role) {
+    ClusterDescription cd = execStatus(application)
+
+    if (!cd.statistics[role]) {
+      return 0;
+    }
+    def statsForRole = cd.statistics[role]
+
+    def requested = statsForRole["containers.requested"]
+    assert null != statsForRole["containers.requested"]
+    int requestedCount = requested
+    return requestedCount
+  }
+
+  boolean hasRequestedContainerCountExceeded(Map<String, String> args) {
+    String application = args['application']
+    String role = args['role']
+    int expectedCount = args['limit'].toInteger();
+    return queryRequestedCount(application, role) >= expectedCount
+  }
+
+  void expectContainerCountExceeded(String application, String role, int limit) {
+
+    repeatUntilTrue(
+        this.&hasRequestedContainerCountExceeded,
+        50,
+        1000 * 10,
+        [limit      : Integer.toString(limit),
+         role       : role,
+         application: application],
+        true,
+        "countainer count not reached") {
+      describe "container count not reached"
+      status(application).dumpOutput()
+    };
+
+  }
+
+  public ClusterDescription expectContainersLive(String clustername,
+      String component,
+      int count) {
+    ClusterDescription cd = execStatus(clustername)
+    assertContainersLive(cd, component, count)
+    return cd;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index 0a5163a..dfdbf06 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -147,7 +147,7 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
               ARG_MESSAGE, "forced-freeze-in-test"
           ])
 
-      describe " >>> Cluster is now frozen - 2nd time."
+      describe " >>> Cluster is now force frozen - 2nd time."
 
       //cluster is no longer live
       exists(0, CLUSTER, false)
@@ -165,9 +165,9 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
 
       describe " >>> Cluster is now thawed - 2nd time."
 
-      ClusterDescription status = killAmAndWaitForRestart(sliderClient, CLUSTER)
 
       describe " >>> Kill AM and wait for restart."
+      ClusterDescription status = killAmAndWaitForRestart(sliderClient, CLUSTER)
 
       def restarted = status.getInfo(
           StatusKeys.INFO_CONTAINERS_AM_RESTART)

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index be8614c..ab6a811 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -25,7 +25,6 @@ import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
 import org.apache.slider.funtest.framework.AgentCommandTestBase
 import org.apache.slider.funtest.framework.FuntestProperties
-import org.apache.slider.funtest.framework.SliderShell
 import org.junit.After
 import org.junit.Test
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/resources/log4j.properties b/slider-funtest/src/test/resources/log4j.properties
index a552a55..65135ca 100644
--- a/slider-funtest/src/test/resources/log4j.properties
+++ b/slider-funtest/src/test/resources/log4j.properties
@@ -42,7 +42,7 @@ log4j.logger.org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner=WARN
 log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN
 log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
 log4j.logger.org.apache.hadoop.hdfs=WARN
-
+log4j.logger.org.apache.hadoop.hdfs.shortcircuit=FATAL
 
 log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN
 log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl=WARN