You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2014/10/23 12:40:46 UTC
[07/10] git commit: SLIDER-460 probes working more reliably
SLIDER-460 probes working more reliably
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/9c5ac0ff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/9c5ac0ff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/9c5ac0ff
Branch: refs/heads/develop
Commit: 9c5ac0ffa0c4ea90d50ea5cbce71278b3a6d4098
Parents: 75030d2
Author: Steve Loughran <st...@apache.org>
Authored: Thu Oct 23 10:48:42 2014 +0100
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Oct 23 11:35:55 2014 +0100
----------------------------------------------------------------------
.../framework/AgentCommandTestBase.groovy | 79 +------------
.../funtest/framework/CommandTestBase.groovy | 117 +++++++++++++++++--
.../lifecycle/AgentClusterLifecycleIT.groovy | 4 +-
.../funtest/lifecycle/AgentFailuresIT.groovy | 1 -
.../src/test/resources/log4j.properties | 2 +-
5 files changed, 113 insertions(+), 90 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
index 4a46f8b..8af51b4 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/AgentCommandTestBase.groovy
@@ -21,7 +21,6 @@ package org.apache.slider.funtest.framework
import groovy.util.logging.Slf4j
import org.apache.hadoop.fs.Path
import org.apache.hadoop.security.UserGroupInformation
-import org.apache.slider.api.ClusterDescription
import org.apache.slider.common.SliderExitCodes
import org.apache.slider.common.params.Arguments
import org.apache.slider.common.params.SliderActions
@@ -125,77 +124,6 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
}
}
- public static void logShell(SliderShell shell) {
- shell.dumpOutput();
- }
-
-
- public ClusterDescription execStatus(String application) {
- ClusterDescription cd
- File statusFile = File.createTempFile("status", ".json")
- try {
- SliderShell shell = slider(EXIT_SUCCESS,
- [
- ACTION_STATUS,
- application,
- ARG_OUTPUT, statusFile.absolutePath
- ])
-
- assert statusFile.exists()
- cd = new ClusterDescription();
- cd.fromFile(statusFile)
- return cd
- } finally {
- statusFile.delete()
- }
- }
-
- public int queryRequestedCount(String application, String role) {
- ClusterDescription cd = execStatus(application)
- int requestedCount = cd.statistics[role]["containers.requested"]
- return requestedCount
- }
-
- boolean hasRequestedContainerCountExceeded(Map<String, String> args) {
- String application = args['application']
- String role = args['role']
- int expectedCount = args['limit'].toInteger();
- return queryRequestedCount(application, role) >= expectedCount
- }
-
- void expectContainerCountExceeded(String application, String role, int limit) {
-
- repeatUntilTrue(
- this.&hasRequestedContainerCountExceeded,
- 50,
- 1000 * 10,
- [limit : Integer.toString(limit),
- role : role,
- application: application],
- true,
- "countainer count not reached") {
- status(application).dumpOutput()
- };
-
- }
- public ClusterDescription expectContainersLive(String clustername,
- String component,
- int count) {
- ClusterDescription cd = execStatus(clustername)
- assertContainersLive(cd, component, count)
- return cd;
- }
-
- public static void assertContainersLive(ClusterDescription clusterDescription,
- String component, int count) {
- log.info("Asserting component count.")
- int instanceCount = clusterDescription.instances[component].size()
- if (count != instanceCount) {
- log.warn(clusterDescription.toString())
- }
- assert count == instanceCount
- }
-
public static String findLineEntry(SliderShell shell, String[] locaters) {
int index = 0;
def output = shell.out
@@ -279,12 +207,9 @@ implements FuntestProperties, Arguments, SliderExitCodes, SliderActions {
return
}
- log.info "Cleaning app instance, if exists, by name " + applicationName
+ describe "Teardown app instance " + applicationName
+ // forced freeze with wait
teardown(applicationName)
-
- // sleep till the instance is frozen
- sleep(1000 * 3)
-
SliderShell shell = slider([
ACTION_DESTROY,
applicationName])
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
index 44d07d8..4b75c56 100644
--- a/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
+++ b/slider-funtest/src/main/groovy/org/apache/slider/funtest/framework/CommandTestBase.groovy
@@ -37,7 +37,6 @@ import org.apache.slider.api.ClusterDescription
import org.apache.slider.common.tools.SliderUtils
import org.apache.slider.client.SliderClient
import org.apache.slider.test.SliderTestUtils
-import org.junit.Assert
import org.junit.Before
import org.junit.BeforeClass
import org.junit.Rule
@@ -45,6 +44,7 @@ import org.junit.rules.Timeout
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import static org.apache.slider.common.SliderExitCodes.*
+import static org.apache.slider.core.main.LauncherExitCodes.*
import static org.apache.slider.funtest.framework.FuntestProperties.*
import static org.apache.slider.common.params.Arguments.*
import static org.apache.slider.common.params.SliderActions.*
@@ -169,6 +169,20 @@ abstract class CommandTestBase extends SliderTestUtils {
"and YARN RM @ ${SLIDER_CONFIG.get(YarnConfiguration.RM_ADDRESS)}")
}
+ public static void assertContainersLive(ClusterDescription clusterDescription,
+ String component, int count) {
+ log.info("Asserting component count.")
+ int instanceCount = clusterDescription.instances[component].size()
+ if (count != instanceCount) {
+ log.warn(clusterDescription.toString())
+ }
+ assert count == instanceCount
+ }
+
+ public static void logShell(SliderShell shell) {
+ shell.dumpOutput();
+ }
+
/**
* give the test thread a name
*/
@@ -333,7 +347,7 @@ abstract class CommandTestBase extends SliderTestUtils {
}
static SliderShell freezeForce(String name) {
- freeze(name, [ARG_FORCE])
+ freeze(name, [ARG_FORCE, ARG_WAIT, "10000"])
}
static SliderShell killContainer(String name, String containerID) {
@@ -643,9 +657,12 @@ abstract class CommandTestBase extends SliderTestUtils {
sleep(5000)
ensureApplicationIsUp(cluster)
+
+/*
def sleeptime = SLIDER_CONFIG.getInt(KEY_AM_RESTART_SLEEP_TIME,
DEFAULT_AM_RESTART_SLEEP_TIME)
sleep(sleeptime)
+*/
ClusterDescription status
status = sliderClient.clusterDescription
@@ -653,18 +670,19 @@ abstract class CommandTestBase extends SliderTestUtils {
}
protected void ensureApplicationIsUp(String application) {
- repeatUntilTrue(this.&isApplicationUp,
+ repeatUntilTrue(this.&isApplicationRunning,
SLIDER_CONFIG.getInt(KEY_TEST_INSTANCE_LAUNCH_TIME,
DEFAULT_INSTANCE_LAUNCH_TIME_SECONDS),
1000,
[application: application],
true,
'Application did not start, failing test.') {
+ describe "final state of app that tests say is not up"
exists(application,true).dumpOutput()
}
}
- protected boolean isApplicationUp(Map<String, String> args) {
+ protected boolean isApplicationRunning(Map<String, String> args) {
String applicationName = args['application'];
return isApplicationInState(YarnApplicationState.RUNNING, applicationName);
}
@@ -686,14 +704,30 @@ abstract class CommandTestBase extends SliderTestUtils {
return shell.ret == 0
}
- protected void repeatUntilTrue(Closure closure,
+ /**
+ * Repeat a probe until it succeeds, if it does not execute a failure
+ * closure then raise an exception with the supplied message
+ * @param probe probe
+ * @param maxAttempts max number of attempts
+ * @param sleepDur sleep between failing attempts
+ * @param args map of arguments to the probe
+ * @param failIfUnsuccessful if the probe fails after all the attempts
+ * —should it raise an exception
+ * @param failureMessage message to include in exception raised
+ * @param failureHandler closure to invoke prior to the failure being raised
+ */
+ protected void repeatUntilTrue(Closure probe,
int maxAttempts, int sleepDur, Map args,
- boolean failIfUnsuccessful = false, String message,
+ boolean failIfUnsuccessful = false,
+ String failureMessage,
Closure failureHandler) {
int attemptCount = 0
+ boolean succeeded = false;
while (attemptCount < maxAttempts) {
- if (closure(args)) {
+ if (probe(args)) {
// finished
+ log.debug("Success after $attemptCount attempt(s)")
+ succeeded = true;
break
};
attemptCount++;
@@ -701,12 +735,77 @@ abstract class CommandTestBase extends SliderTestUtils {
sleep(sleepDur)
}
- if (failIfUnsuccessful & attemptCount != maxAttempts) {
+ if (failIfUnsuccessful & !succeeded) {
if (failureHandler) {
failureHandler()
}
- fail(message)
+ fail(failureMessage)
+ }
+ }
+
+ public ClusterDescription execStatus(String application) {
+ ClusterDescription cd
+ File statusFile = File.createTempFile("status", ".json")
+ try {
+ slider(EXIT_SUCCESS,
+ [
+ ACTION_STATUS,
+ application,
+ ARG_OUTPUT, statusFile.absolutePath
+ ])
+
+ assert statusFile.exists()
+ cd = new ClusterDescription();
+ cd.fromFile(statusFile)
+ return cd
+ } finally {
+ statusFile.delete()
}
}
+ public int queryRequestedCount(String application, String role) {
+ ClusterDescription cd = execStatus(application)
+
+ if (!cd.statistics[role]) {
+ return 0;
+ }
+ def statsForRole = cd.statistics[role]
+
+ def requested = statsForRole["containers.requested"]
+ assert null != statsForRole["containers.requested"]
+ int requestedCount = requested
+ return requestedCount
+ }
+
+ boolean hasRequestedContainerCountExceeded(Map<String, String> args) {
+ String application = args['application']
+ String role = args['role']
+ int expectedCount = args['limit'].toInteger();
+ return queryRequestedCount(application, role) >= expectedCount
+ }
+
+ void expectContainerCountExceeded(String application, String role, int limit) {
+
+ repeatUntilTrue(
+ this.&hasRequestedContainerCountExceeded,
+ 50,
+ 1000 * 10,
+ [limit : Integer.toString(limit),
+ role : role,
+ application: application],
+ true,
+ "countainer count not reached") {
+ describe "container count not reached"
+ status(application).dumpOutput()
+ };
+
+ }
+
+ public ClusterDescription expectContainersLive(String clustername,
+ String component,
+ int count) {
+ ClusterDescription cd = execStatus(clustername)
+ assertContainersLive(cd, component, count)
+ return cd;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
index 0a5163a..dfdbf06 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentClusterLifecycleIT.groovy
@@ -147,7 +147,7 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
ARG_MESSAGE, "forced-freeze-in-test"
])
- describe " >>> Cluster is now frozen - 2nd time."
+ describe " >>> Cluster is now force frozen - 2nd time."
//cluster is no longer live
exists(0, CLUSTER, false)
@@ -165,9 +165,9 @@ public class AgentClusterLifecycleIT extends AgentCommandTestBase
describe " >>> Cluster is now thawed - 2nd time."
- ClusterDescription status = killAmAndWaitForRestart(sliderClient, CLUSTER)
describe " >>> Kill AM and wait for restart."
+ ClusterDescription status = killAmAndWaitForRestart(sliderClient, CLUSTER)
def restarted = status.getInfo(
StatusKeys.INFO_CONTAINERS_AM_RESTART)
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
index be8614c..ab6a811 100644
--- a/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
+++ b/slider-funtest/src/test/groovy/org/apache/slider/funtest/lifecycle/AgentFailuresIT.groovy
@@ -25,7 +25,6 @@ import org.apache.slider.common.params.Arguments
import org.apache.slider.common.params.SliderActions
import org.apache.slider.funtest.framework.AgentCommandTestBase
import org.apache.slider.funtest.framework.FuntestProperties
-import org.apache.slider.funtest.framework.SliderShell
import org.junit.After
import org.junit.Test
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/9c5ac0ff/slider-funtest/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/slider-funtest/src/test/resources/log4j.properties b/slider-funtest/src/test/resources/log4j.properties
index a552a55..65135ca 100644
--- a/slider-funtest/src/test/resources/log4j.properties
+++ b/slider-funtest/src/test/resources/log4j.properties
@@ -42,7 +42,7 @@ log4j.logger.org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner=WARN
log4j.logger.org.apache.hadoop.hdfs.server.blockmanagement=WARN
log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
log4j.logger.org.apache.hadoop.hdfs=WARN
-
+log4j.logger.org.apache.hadoop.hdfs.shortcircuit=FATAL
log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN
log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl=WARN