You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2015/11/12 19:15:16 UTC
[01/12] incubator-slider git commit: SLIDER-966 fix regressions shown
up by tests, mostly in test setup, but one test,
TestMockContainerResourceAllocations,
showed that resource normalization stamped on vcore requirements
Repository: incubator-slider
Updated Branches:
refs/heads/feature/SLIDER-82-pass-3.1 e0fb52916 -> 5a61b4cd8
SLIDER-966 fix regressions shown up by tests, mostly in test setup, but one test, TestMockContainerResourceAllocations, showed that resource normalization stamped on vcore requirements
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/89fd701b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/89fd701b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/89fd701b
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 89fd701bf1ab50b759babb01b075307ed6080c0d
Parents: e0fb529
Author: Steve Loughran <st...@apache.org>
Authored: Mon Nov 9 13:58:03 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Mon Nov 9 13:58:03 2015 +0000
----------------------------------------------------------------------
.../state/AbstractClusterServices.java | 24 +++++++++++-
.../slider/server/appmaster/state/AppState.java | 15 +++++++-
.../slider/client/TestClientBadArgs.groovy | 39 ++++++++++++--------
.../appmaster/model/mock/MockAppState.groovy | 5 ++-
4 files changed, 63 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
index eba8c38..54f384b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
@@ -18,6 +18,7 @@
package org.apache.slider.server.appmaster.state;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
@@ -25,6 +26,10 @@ import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
* Cluster services offered by the YARN infrastructure.
*/
public abstract class AbstractClusterServices {
+
+ private final DefaultResourceCalculator
+ defaultResourceCalculator = new DefaultResourceCalculator();
+
/**
* Create a resource for requests
* @return a resource which can be built up.
@@ -33,7 +38,24 @@ public abstract class AbstractClusterServices {
public abstract Resource newResource(int memory, int cores);
+ /**
+ * Normalise memory, CPU and other resources according to the YARN AM-supplied
+ * values and the resource calculator in use (currently hard-coded to the
+ * {@link DefaultResourceCalculator}.
+ * Those resources which aren't normalized (currently: CPU) are left
+ * as is.
+ * @param resource resource requirements of a role
+ * @param minR minimum values of this queue
+ * @param maxR max values of this queue
+ * @return a normalized value.
+ */
public Resource normalize(Resource resource, Resource minR, Resource maxR) {
- return new DefaultResourceCalculator().normalize(resource, minR, maxR);
+ Preconditions.checkArgument(resource != null, "null resource");
+ Preconditions.checkArgument(minR != null, "null minR");
+ Preconditions.checkArgument(maxR != null, "null maxR");
+
+ Resource normalize = defaultResourceCalculator.normalize(resource, minR,
+ maxR, minR);
+ return newResource(normalize.getMemory(), resource.getVirtualCores());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 21f59a1..f74fe98 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -21,6 +21,7 @@ package org.apache.slider.server.appmaster.state;
import com.codahale.metrics.Metric;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.Container;
@@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.ClusterDescriptionKeys;
import org.apache.slider.api.ClusterDescriptionOperations;
@@ -322,6 +324,8 @@ public class AppState {
*/
public AppState(AbstractClusterServices recordFactory,
MetricsAndMonitoring metricsAndMonitoring) {
+ Preconditions.checkArgument(recordFactory != null, "null recordFactory");
+ Preconditions.checkArgument(metricsAndMonitoring != null, "null metricsAndMonitoring");
this.recordFactory = recordFactory;
this.metricsAndMonitoring = metricsAndMonitoring;
@@ -1310,7 +1314,16 @@ public class AppState {
DEF_YARN_MEMORY,
containerMaxMemory);
capability.setMemory(ram);
- return recordFactory.normalize(capability,minResource, maxResource);
+ log.debug("Component {} has RAM={}, vCores ={}", name, ram, cores);
+ Resource normalized = recordFactory.normalize(capability, minResource,
+ maxResource);
+ if (!Resources.equals(normalized, capability)) {
+ // resource requirements normalized to something other than asked for.
+ // LOG @ WARN so users can see why this is happening.
+ log.warn("Resource requirements of {} normalized" +
+ " from {} to {}", name, capability, normalized);
+ }
+ return normalized;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy b/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
index 12736e3..1b074dc 100644
--- a/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
@@ -21,6 +21,7 @@ package org.apache.slider.client
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.slider.common.params.ActionRegistryArgs
import org.apache.slider.common.params.Arguments
import org.apache.slider.common.params.SliderActions
@@ -44,7 +45,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testNoAction() throws Throwable {
launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Usage: slider COMMAND",
[])
@@ -53,7 +54,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testUnknownAction() throws Throwable {
launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"not-a-known-action",
["not-a-known-action"])
}
@@ -61,7 +62,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testActionWithoutOptions() throws Throwable {
launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Usage: slider build <application>",
[SliderActions.ACTION_BUILD])
}
@@ -69,7 +70,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testActionWithoutEnoughArgs() throws Throwable {
launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
ErrorStrings.ERROR_NOT_ENOUGH_ARGUMENTS,
[SliderActions.ACTION_THAW])
}
@@ -77,7 +78,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testActionWithTooManyArgs() throws Throwable {
launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
ErrorStrings.ERROR_TOO_MANY_ARGUMENTS,
[SliderActions.ACTION_HELP,
"hello, world"])
@@ -86,7 +87,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testBadImageArg() throws Throwable {
launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Unknown option: --image",
[SliderActions.ACTION_HELP,
Arguments.ARG_IMAGE])
@@ -95,7 +96,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testRegistryUsage() throws Throwable {
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"org.apache.slider.core.exceptions.UsageException: Argument --name missing",
[SliderActions.ACTION_REGISTRY])
assert exception instanceof UsageException
@@ -105,7 +106,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testRegistryExportBadUsage1() throws Throwable {
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Expected a value after parameter --getexp",
[SliderActions.ACTION_REGISTRY,
Arguments.ARG_NAME,
@@ -118,7 +119,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testRegistryExportBadUsage2() throws Throwable {
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Expected a value after parameter --getexp",
[SliderActions.ACTION_REGISTRY,
Arguments.ARG_NAME,
@@ -132,7 +133,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testRegistryExportBadUsage3() throws Throwable {
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Usage: registry",
[SliderActions.ACTION_REGISTRY,
Arguments.ARG_NAME,
@@ -147,7 +148,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
@Test
public void testUpgradeUsage() throws Throwable {
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"org.apache.slider.core.exceptions.BadCommandArgumentsException: Not enough arguments for action: upgrade Expected minimum 1 but got 0",
[SliderActions.ACTION_UPGRADE])
assert exception instanceof BadCommandArgumentsException
@@ -158,7 +159,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
public void testUpgradeWithTemplateOptionOnly() throws Throwable {
String appName = "test_hbase"
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"BadCommandArgumentsException: Option --resources must be specified with option --template",
[SliderActions.ACTION_UPGRADE,
appName,
@@ -169,11 +170,17 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
log.info(exception.toString())
}
+ public Configuration createTestConfig() {
+ def configuration = new Configuration()
+ configuration.set(YarnConfiguration.RM_ADDRESS, "127.0.0.1:8032")
+ return configuration
+ }
+
@Test
public void testUpgradeWithResourcesOptionOnly() throws Throwable {
String appName = "test_hbase"
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"BadCommandArgumentsException: Option --template must be specified with option --resources",
[SliderActions.ACTION_UPGRADE,
appName,
@@ -188,7 +195,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
public void testUpgradeWithTemplateResourcesAndContainersOption() throws Throwable {
String appName = "test_hbase"
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"BadCommandArgumentsException: Option --containers cannot be "
+ "specified with --template or --resources",
[SliderActions.ACTION_UPGRADE,
@@ -208,7 +215,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
public void testUpgradeWithTemplateResourcesAndComponentsOption() throws Throwable {
String appName = "test_hbase"
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"BadCommandArgumentsException: Option --components cannot be "
+ "specified with --template or --resources",
[SliderActions.ACTION_UPGRADE,
@@ -228,7 +235,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
public void testCreateAppWithAddonPkgBadArg1() throws Throwable {
//add on package without specifying add on package name
def exception = launchExpectingException(SliderClient,
- new Configuration(),
+ createTestConfig(),
"Expected 2 values after --addon",
[SliderActions.ACTION_CREATE,
"cl1",
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
index 5565e6b..c041ce5 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
@@ -29,8 +29,10 @@ import org.apache.slider.server.appmaster.state.AppStateBindingInfo
class MockAppState extends AppState {
public static final int RM_MAX_RAM = 4096
public static final int RM_MAX_CORES = 64
+
public MockAppState(AbstractClusterServices recordFactory) {
super(recordFactory, new MetricsAndMonitoring());
+ setContainerLimits(1, RM_MAX_RAM, 1, RM_MAX_CORES)
}
long time = 0;
@@ -39,8 +41,7 @@ class MockAppState extends AppState {
* Instance with a mock record factory
*/
public MockAppState() {
- super(new MockClusterServices(), new MetricsAndMonitoring());
- setContainerLimits(1, RM_MAX_RAM, 1, RM_MAX_CORES)
+ this(new MockClusterServices());
}
MockAppState(AppStateBindingInfo bindingInfo) {
[02/12] incubator-slider git commit: SLIDER-967 Use nodemap to build
up location restrictions on AA placement
Posted by st...@apache.org.
SLIDER-967 Use nodemap to build up location restrictions on AA placement
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/2606192a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/2606192a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/2606192a
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 2606192ab0685fc267fcd5f3670e13b93a7a83b6
Parents: 89fd701
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:54:15 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:54:15 2015 +0000
----------------------------------------------------------------------
.../slider/server/appmaster/state/AppState.java | 15 ++-
.../server/appmaster/state/NodeInstance.java | 74 +++++++++++++--
.../slider/server/appmaster/state/NodeMap.java | 44 ++++++++-
.../appmaster/state/OutstandingRequest.java | 67 +++----------
.../state/OutstandingRequestTracker.java | 23 ++---
.../server/appmaster/state/RoleHistory.java | 70 +++++++-------
.../appmaster/state/RoleHostnamePair.java | 75 +++++++++++++++
.../server/appmaster/web/SliderAMWebApp.java | 2 +-
.../model/history/TestRoleHistoryAA.groovy | 98 +++++++++++++++++++-
.../TestRoleHistoryContainerEvents.groovy | 5 +-
.../appmaster/model/mock/MockNodeReport.groovy | 34 +++++++
11 files changed, 376 insertions(+), 131 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index f74fe98..063a7fc 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -1427,7 +1427,11 @@ public class AppState {
* @param updatedNodes updated nodes
*/
public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
- roleHistory.onNodesUpdated(updatedNodes);
+ boolean changed = roleHistory.onNodesUpdated(updatedNodes);
+ if (changed) {
+ //TODO
+ log.error("TODO: cancel AA requests and re-review");
+ }
}
/**
@@ -1923,13 +1927,18 @@ public class AppState {
if (isAA) {
// build one only if there is none outstanding
- if (role.getPendingAntiAffineRequests() == 0) {
+ if (role.getPendingAntiAffineRequests() == 0
+ && roleHistory.canPlaceAANodes()) {
log.info("Starting an anti-affine request sequence for {} nodes", delta);
// log the number outstanding
role.incPendingAntiAffineRequests(delta - 1);
addContainerRequest(operations, createContainerRequest(role));
} else {
- log.info("Adding {} more anti-affine requests", delta);
+ if (roleHistory.canPlaceAANodes()) {
+ log.info("Adding {} more anti-affine requests", delta);
+ } else {
+ log.warn("Awaiting node map before generating node requests");
+ }
role.incPendingAntiAffineRequests(delta);
}
} else {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index 7fc912d..b805ffb 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -20,16 +20,15 @@ package org.apache.slider.server.appmaster.state;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState;
-import org.apache.slider.api.types.NodeEntryInformation;
import org.apache.slider.api.types.NodeInformation;
import org.apache.slider.common.tools.Comparators;
-import org.apache.slider.common.tools.SliderUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.ListIterator;
+import java.util.Set;
/**
* A node instance -stores information about a node in the cluster.
@@ -46,6 +45,9 @@ public class NodeInstance {
*/
private NodeState nodeState = NodeState.RUNNING;
+ /**
+ * Last node report. If null: none
+ */
private NodeReport nodeReport = null;
/**
@@ -53,6 +55,17 @@ public class NodeInstance {
*/
private long nodeStateUpdateTime = 0;
+ /**
+ * Node labels.
+ *
+ * IMPORTANT: we assume that there is one label/node, which is the policy
+ * for Hadoop as of November 2015
+ */
+ private String nodeLabels = "";
+
+ /**
+ * The list of node entries of specific roles
+ */
private final List<NodeEntry> nodeEntries;
/**
@@ -64,18 +77,41 @@ public class NodeInstance {
nodeEntries = new ArrayList<>(roles);
}
-
/**
- * Update the node status
+ * Update the node status.
+ * The return code is true if the node state changed enough to
+ * trigger a re-evaluation of pending requests. That is, either a node
+ * became available when it was previously not, or the label changed
+ * on an available node.
+ *
+ * Transitions of a node from live to dead aren't treated as significant,
+ * nor label changes on a dead node.
+ *
* @param report latest node report
- * @return true if the node state changed
+ * @return true if the node state changed enough for a request evaluation.
*/
public synchronized boolean updateNode(NodeReport report) {
+ nodeStateUpdateTime = report.getLastHealthReportTime();
nodeReport = report;
NodeState oldState = nodeState;
+ boolean oldStateUnusable = oldState.isUnusable();
nodeState = report.getNodeState();
- nodeStateUpdateTime = report.getLastHealthReportTime();
- return nodeState != oldState;
+ boolean newUsable = !nodeState.isUnusable();
+ boolean nodeNowAvailable = oldStateUnusable && newUsable;
+ String labels = this.nodeLabels;
+ Set<String> newlabels = report.getNodeLabels();
+ if (newlabels != null && !newlabels.isEmpty()) {
+ nodeLabels = newlabels.iterator().next().trim();
+ } else {
+ nodeLabels = "";
+ }
+ return nodeNowAvailable
+ || newUsable && !this.nodeLabels.equals(labels);
+ }
+
+
+ public String getNodeLabels() {
+ return nodeLabels;
}
/**
@@ -130,6 +166,14 @@ public class NodeInstance {
}
/**
+ * Is the node considered online
+ * @return the node
+ */
+ public boolean isOnline() {
+ return !nodeState.isUnusable();
+ }
+
+ /**
* Query for a node being considered unreliable
* @param role role key
* @param threshold threshold above which a node is considered unreliable
@@ -137,7 +181,6 @@ public class NodeInstance {
*/
public boolean isConsideredUnreliable(int role, int threshold) {
NodeEntry entry = get(role);
-
return entry != null && entry.getFailedRecently() > threshold;
}
@@ -258,10 +301,10 @@ public class NodeInstance {
// null-handling state constructor
info.state = "" + nodeState;
info.lastUpdated = nodeStateUpdateTime;
+ info.labels = nodeLabels;
if (nodeReport != null) {
info.httpAddress = nodeReport.getHttpAddress();
info.rackName = nodeReport.getRackName();
- info.labels = SliderUtils.join(nodeReport.getNodeLabels(), ", ", false);
info.healthReport = nodeReport.getHealthReport();
}
info.entries = new ArrayList<>(nodeEntries.size());
@@ -272,6 +315,19 @@ public class NodeInstance {
}
/**
+ * Is this node instance a suitable candidate for the specific role?
+ * @param role role ID
+ * @param label label which must match, or "" for no label checks
+ * @return true if the node has space for this role, is running and the labels
+ * match.
+ */
+ public boolean canHost(int role, String label) {
+ return isOnline()
+ && (label.isEmpty() || label.equals(nodeLabels)) // label match
+ && (get(role) == null || get(role).isAvailable()); // no live role
+ }
+
+ /**
* A comparator for sorting entries where the node is preferred over another.
*
* The exact algorithm may change: current policy is "most recent first", so sorted
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index b631057..2887c9e 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -92,20 +92,24 @@ public class NodeMap extends HashMap<String, NodeInstance> {
}
}
-
/**
- * Update the node state
+ * Update the node state. Return true if the node state changed: either by
+ * being created, or by changing its internal state as defined
+ * by {@link NodeInstance#updateNode(NodeReport)}.
+ *
* @param hostname host name
* @param report latest node report
- * @return the updated node.
+ * @return true if the node state changed enough for a request evaluation.
*/
public boolean updateNode(String hostname, NodeReport report) {
- return getOrCreate(hostname).updateNode(report);
+ boolean nodeExisted = get(hostname) != null;
+ boolean updated = getOrCreate(hostname).updateNode(report);
+ return updated || !nodeExisted;
}
/**
* Clone point
- * @return
+ * @return a shallow clone
*/
@Override
public Object clone() {
@@ -123,4 +127,34 @@ public class NodeMap extends HashMap<String, NodeInstance> {
put(node.hostname, node);
}
}
+
+ /**
+ * Test helper: build or update a cluster from a list of node reports
+ * @param reports the list of reports
+ * @return true if this has been considered to have changed the cluster
+ */
+ @VisibleForTesting
+ public boolean buildOrUpdate(List<NodeReport> reports) {
+ boolean updated = false;
+ for (NodeReport report : reports) {
+ updated |= getOrCreate(report.getNodeId().getHost()).updateNode(report);
+ }
+ return updated;
+ }
+
+ /**
+ * Scan the current node map for all nodes capable of hosting an instance
+ * @param role role ID
+ * @param label label which must match, or "" for no label checks
+ * @return a list of node instances matching the criteria.
+ */
+ public List<NodeInstance> findNodesForRole(int role, String label) {
+ List<NodeInstance> nodes = new ArrayList<>(size());
+ for (NodeInstance instance : values()) {
+ if (instance.canHost(role, label)) {
+ nodes.add(instance);
+ }
+ }
+ return nodes;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
index 38bc96f..a9d4b52 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
@@ -43,24 +43,14 @@ import java.util.List;
* instance constructed with (role, hostname) can be used to look up
* a complete request instance in the {@link OutstandingRequestTracker} map
*/
-public final class OutstandingRequest {
+public final class OutstandingRequest extends RoleHostnamePair {
protected static final Logger log =
LoggerFactory.getLogger(OutstandingRequest.class);
/**
- * requested role
- */
- public final int roleId;
-
- /**
* Node the request is for -may be null
*/
public final NodeInstance node;
-
- /**
- * hostname -will be null if node==null
- */
- public final String hostname;
/**
* Optional label. This is cached as the request option (explicit-location + label) is forbidden,
@@ -111,9 +101,8 @@ public final class OutstandingRequest {
*/
public OutstandingRequest(int roleId,
NodeInstance node) {
- this.roleId = roleId;
+ super(roleId, node != null ? node.hostname : null);
this.node = node;
- this.hostname = node != null ? node.hostname : null;
}
/**
@@ -125,9 +114,8 @@ public final class OutstandingRequest {
* @param hostname hostname
*/
public OutstandingRequest(int roleId, String hostname) {
+ super(roleId, hostname);
this.node = null;
- this.roleId = roleId;
- this.hostname = hostname;
}
/**
@@ -301,52 +289,13 @@ public final class OutstandingRequest {
return issuedRequest != null && issuedRequest.getCapability().equals(resource);
}
- /**
- * Equality is on hostname and role
- * @param o other
- * @return true on a match
- */
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null || getClass() != o.getClass()) {
- return false;
- }
-
- OutstandingRequest request = (OutstandingRequest) o;
-
- if (roleId != request.roleId) {
- return false;
- }
- if (hostname != null
- ? !hostname.equals(request.hostname)
- : request.hostname != null) {
- return false;
- }
- return true;
- }
-
- /**
- * hash on hostname and role
- * @return hash code
- */
- @Override
- public int hashCode() {
- int result = roleId;
- result = 31 * result + (hostname != null ? hostname.hashCode() : 0);
- return result;
- }
-
@Override
public String toString() {
int requestRoleId = ContainerPriority.extractRole(getPriority());
boolean requestHasLocation = ContainerPriority.hasLocation(getPriority());
final StringBuilder sb = new StringBuilder("OutstandingRequest{");
- sb.append("roleId=").append(this.roleId);
+ sb.append(super.toString());
sb.append(", node=").append(node);
- sb.append(", hostname='").append(hostname).append('\'');
sb.append(", hasLocation=").append(requestHasLocation);
sb.append(", requestedTimeMillis=").append(requestedTimeMillis);
sb.append(", mayEscalate=").append(mayEscalate);
@@ -367,7 +316,6 @@ public final class OutstandingRequest {
return new CancelSingleRequest(issuedRequest);
}
-
/**
* Valid if a node label expression specified on container request is valid or
* not. Mimics the logic in AMRMClientImpl, so can be used for preflight checking
@@ -410,5 +358,12 @@ public final class OutstandingRequest {
}
}
+ /**
+ * Create a new role/hostname pair for indexing.
+ * @return a new index.
+ */
+ public RoleHostnamePair getIndex() {
+ return new RoleHostnamePair(roleId, hostname);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index a791826..ecdc07a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -35,10 +35,12 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
+import java.util.Set;
/**
* Tracks outstanding requests made with a specific placement option.
@@ -62,8 +64,7 @@ public class OutstandingRequestTracker {
*/
private final List<AbstractRMOperation> NO_REQUESTS = new ArrayList<>(0);
- private Map<OutstandingRequest, OutstandingRequest> placedRequests =
- new HashMap<>();
+ private Map<RoleHostnamePair, OutstandingRequest> placedRequests = new HashMap<>();
/**
* List of open requests; no specific details on them.
@@ -82,10 +83,9 @@ public class OutstandingRequestTracker {
* @return a new request
*/
public synchronized OutstandingRequest newRequest(NodeInstance instance, int role) {
- OutstandingRequest request =
- new OutstandingRequest(role, instance);
+ OutstandingRequest request = new OutstandingRequest(role, instance);
if (request.isLocated()) {
- placedRequests.put(request, request);
+ placedRequests.put(request.getIndex(), request);
} else {
openRequests.add(request);
}
@@ -101,7 +101,7 @@ public class OutstandingRequestTracker {
@VisibleForTesting
public synchronized OutstandingRequest lookupPlacedRequest(int role, String hostname) {
Preconditions.checkArgument(hostname != null, "null hostname");
- return placedRequests.get(new OutstandingRequest(role, hostname));
+ return placedRequests.get(new RoleHostnamePair(role, hostname));
}
/**
@@ -294,12 +294,12 @@ public class OutstandingRequestTracker {
*/
public synchronized List<NodeInstance> resetOutstandingRequests(int role) {
List<NodeInstance> hosts = new ArrayList<>();
- Iterator<Map.Entry<OutstandingRequest,OutstandingRequest>> iterator =
+ Iterator<Map.Entry<RoleHostnamePair, OutstandingRequest>> iterator =
placedRequests.entrySet().iterator();
while (iterator.hasNext()) {
- Map.Entry<OutstandingRequest, OutstandingRequest> next =
+ Map.Entry<RoleHostnamePair, OutstandingRequest> next =
iterator.next();
- OutstandingRequest request = next.getKey();
+ OutstandingRequest request = next.getValue();
if (request.roleId == role) {
iterator.remove();
request.completed();
@@ -390,9 +390,10 @@ public class OutstandingRequestTracker {
*/
public synchronized List<OutstandingRequest> extractPlacedRequestsForRole(int roleId, int count) {
List<OutstandingRequest> results = new ArrayList<>();
- Iterator<OutstandingRequest> iterator = placedRequests.keySet().iterator();
+ Iterator<Map.Entry<RoleHostnamePair, OutstandingRequest>>
+ iterator = placedRequests.entrySet().iterator();
while (iterator.hasNext() && count > 0) {
- OutstandingRequest request = iterator.next();
+ OutstandingRequest request = iterator.next().getValue();
if (request.roleId == roleId) {
results.add(request);
count--;
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index f8271a6..df1f4e1 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -109,12 +109,6 @@ public class RoleHistory {
private Map<Integer, LinkedList<NodeInstance>> recentNodes;
/**
- * Track the failed nodes. Currently used to make wiser decision of container
- * ask with/without locality. Has other potential uses as well.
- */
- private Set<String> failedNodes = new HashSet<>();
-
- /**
* Instantiate
* @param roles initial role list
* @param recordFactory yarn record factory
@@ -137,7 +131,6 @@ public class RoleHistory {
protected synchronized void reset() throws BadConfigException {
nodemap = new NodeMap(roleSize);
- failedNodes = new HashSet<>();
resetAvailableNodeLists();
outstandingRequests = new OutstandingRequestTracker();
}
@@ -578,7 +571,8 @@ public class RoleHistory {
NodeInstance candidate = targets.get(i);
if (candidate.getActiveRoleInstances(roleId) == 0) {
// no active instances: check failure statistics
- if (strictPlacement || !candidate.exceedsFailureThreshold(role)) {
+ if (strictPlacement
+ || (candidate.isOnline() && !candidate.exceedsFailureThreshold(role))) {
targets.remove(i);
// exit criteria for loop is now met
nodeInstance = candidate;
@@ -779,6 +773,16 @@ public class RoleHistory {
}
/**
+ * Does the RoleHistory have enough information about the YARN cluster
+ * to start placing AA requests? That is: has it the node map and
+ * any label information needed?
+ * @return true if the caller can start requesting AA nodes
+ */
+ public boolean canPlaceAANodes() {
+ return nodeUpdateReceived.get();
+ }
+
+ /**
* Get the last time the nodes were updated from YARN
* @return the update time or zero if never updated.
*/
@@ -788,33 +792,35 @@ public class RoleHistory {
/**
* Update failedNodes and nodemap based on the node state
- *
+ *
* @param updatedNodes list of updated nodes
+ * @return true if a review should be triggered.
*/
- public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
+ public synchronized boolean onNodesUpdated(List<NodeReport> updatedNodes) {
log.debug("Updating {} nodes", updatedNodes.size());
nodesUpdatedTime.set(now());
nodeUpdateReceived.set(true);
+ int printed = 0;
+ boolean triggerReview = false;
for (NodeReport updatedNode : updatedNodes) {
String hostname = updatedNode.getNodeId() == null
- ? null
- : updatedNode.getNodeId().getHost();
+ ? ""
+ : updatedNode.getNodeId().getHost();
NodeState nodeState = updatedNode.getNodeState();
- if (hostname == null || nodeState == null) {
+ if (hostname.isEmpty() || nodeState == null) {
+ log.warn("Ignoring incomplete update");
continue;
}
- log.debug("host {} is in state {}", hostname, nodeState);
+ if (log.isDebugEnabled() && printed++ < 10) {
+ // log the first few, but avoid overloading the logs for a full cluster
+ // update
+ log.debug("Node \"{}\" is in state {}", hostname, nodeState);
+ }
// update the node; this also creates an instance if needed
boolean updated = nodemap.updateNode(hostname, updatedNode);
- if (updated) {
- if (nodeState.isUnusable()) {
- log.info("Failed node {} state {}", hostname, nodeState);
- failedNodes.add(hostname);
- } else {
- failedNodes.remove(hostname);
- }
- }
+ triggerReview |= updated;
}
+ return triggerReview;
}
/**
@@ -852,7 +858,7 @@ public class RoleHistory {
/**
* Mark a container finished; if it was released then that is treated
- * differently. history is touch()ed
+ * differently. history is {@code touch()}-ed
*
*
* @param container completed container
@@ -917,9 +923,6 @@ public class RoleHistory {
for (NodeInstance node : nodemap.values()) {
log.info(node.toFullString());
}
-
- log.info("Failed nodes: {}",
- SliderUtils.joinWithInnerSeparator(" ", failedNodes));
}
/**
@@ -963,17 +966,6 @@ public class RoleHistory {
}
/**
- * Get a clone of the failedNodes
- *
- * @return the list
- */
- public List<String> cloneFailedNodes() {
- List<String> lst = new ArrayList<>();
- lst.addAll(failedNodes);
- return lst;
- }
-
- /**
* Escalate operation as triggered by external timer.
* @return a (usually empty) list of cancel/request operations.
*/
@@ -983,8 +975,8 @@ public class RoleHistory {
/**
* Build the list of requests to cancel from the outstanding list.
- * @param role
- * @param toCancel
+ * @param role role
+ * @param toCancel number to cancel
* @return a list of cancellable operations.
*/
public synchronized List<AbstractRMOperation> cancelRequestsForRole(RoleStatus role, int toCancel) {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java
new file mode 100644
index 0000000..920887a
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.state;
+
+import java.util.Objects;
+
+public class RoleHostnamePair {
+
+ /**
+ * requested role
+ */
+ public final int roleId;
+
+ /**
+ * hostname -will be null if node==null
+ */
+ public final String hostname;
+
+ public RoleHostnamePair(int roleId, String hostname) {
+ this.roleId = roleId;
+ this.hostname = hostname;
+ }
+
+ public int getRoleId() {
+ return roleId;
+ }
+
+ public String getHostname() {
+ return hostname;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof RoleHostnamePair)) {
+ return false;
+ }
+ RoleHostnamePair that = (RoleHostnamePair) o;
+ return Objects.equals(roleId, that.roleId) &&
+ Objects.equals(hostname, that.hostname);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(roleId, hostname);
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder(
+ "RoleHostnamePair{");
+ sb.append("roleId=").append(roleId);
+ sb.append(", hostname='").append(hostname).append('\'');
+ sb.append('}');
+ return sb.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
index 84f0eba..7ecc00c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
@@ -92,7 +92,7 @@ public class SliderAMWebApp extends WebApp {
String regex = "(?!/ws)";
serveRegex(regex).with(SliderDefaultWrapperServlet.class);
- Map<String, String> params = new HashMap<String, String>();
+ Map<String, String> params = new HashMap<>();
params.put(ResourceConfig.FEATURE_IMPLICIT_VIEWABLES, "true");
params.put(ServletContainer.FEATURE_FILTER_FORWARD_ON_404, "true");
params.put(ResourceConfig.FEATURE_XMLROOTELEMENT_PROCESSING, "true");
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index 36b9d66..f99326f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -18,16 +18,106 @@
package org.apache.slider.server.appmaster.model.history
-import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.NodeReport
+import org.apache.hadoop.yarn.api.records.NodeState
+import org.apache.slider.server.appmaster.model.mock.MockNodeReport
+import org.apache.slider.server.appmaster.state.NodeEntry
+import org.apache.slider.server.appmaster.state.NodeInstance
+import org.apache.slider.server.appmaster.state.NodeMap
+import org.apache.slider.test.SliderTestBase
import org.junit.Test
/**
* Test anti-affine
*/
-class TestRoleHistoryAA extends BaseMockAppStateTest {
+//@CompileStatic
+@Slf4j
+class TestRoleHistoryAA extends SliderTestBase {
+
+ List<String> hostnames = ["one", "two", "three"]
+ NodeMap nodeMap, gpuNodeMap
+
+ @Override
+ void setup() {
+ super.setup()
+ nodeMap = createNodeMap(hostnames, NodeState.RUNNING)
+ gpuNodeMap = createNodeMap(hostnames, NodeState.RUNNING, "GPU")
+
+ }
+
+ @Test
+ public void testFindNodesInFullCluster() throws Throwable {
+ // all three will surface at first
+ assertResultSize(3, nodeMap.findNodesForRole(1, ""))
+ }
+
+ @Test
+ public void testFindNodesInUnhealthyCluster() throws Throwable {
+ // all three will surface at first
+ nodeMap.get("one").updateNode(new MockNodeReport("one",NodeState.UNHEALTHY))
+ assertResultSize(2, nodeMap.findNodesForRole(1, ""))
+ }
+
+ @Test
+ public void testFindNoNodesWrongLabel() throws Throwable {
+ // all three will surface at first
+ assertResultSize(0, nodeMap.findNodesForRole(1, "GPU"))
+ }
+
+ @Test
+ public void testFindNoNodesRightLabel() throws Throwable {
+ // all three will surface at first
+ assertResultSize(3, gpuNodeMap.findNodesForRole(1, "GPU"))
+ }
@Test
- public void test() throws Throwable {
-
+ public void testFindNoNodesNoLabel() throws Throwable {
+ // all three will surface at first
+ assertResultSize(3, gpuNodeMap.findNodesForRole(1, ""))
+ }
+
+ @Test
+ public void testFindNoNodesClusterRequested() throws Throwable {
+ // all three will surface at first
+ applyToNodeEntries(nodeMap) {
+ NodeEntry it -> it.request()
+ }
+ assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+ }
+
+ @Test
+ public void testFindNoNodesClusterBusy() throws Throwable {
+ // all three will surface at first
+ applyToNodeEntries(nodeMap) {
+ NodeEntry it -> it.request()
+ }
+ assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+ }
+
+ def assertResultSize(int size, List<NodeInstance> list) {
+ if (list.size() != size) {
+ list.each { log.error(it.toFullString())}
+ }
+ assert size == list.size()
+ }
+
+ def applyToNodeEntries(Collection<NodeInstance> list, Closure cl) {
+ list.each { it -> cl(it.getOrCreate(1)) }
+ }
+
+ def applyToNodeEntries(NodeMap nodeMap, Closure cl) {
+ applyToNodeEntries(nodeMap.values(), cl)
+ }
+
+ def NodeMap createNodeMap(List<NodeReport> nodeReports) {
+ NodeMap nodeMap = new NodeMap(1)
+ nodeMap.buildOrUpdate(nodeReports)
+ nodeMap
+ }
+
+ def NodeMap createNodeMap(List<String> hosts, NodeState state,
+ String label = "") {
+ createNodeMap(MockNodeReport.createInstances(hosts, state, label))
}
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
index d9cfddb..c8a82bd 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
@@ -415,7 +415,7 @@ class TestRoleHistoryContainerEvents extends BaseMockAppStateTest {
// as even unused nodes are added to the list, we expect the map size to be >1
assert startSize <= endSize
assert nodemap[hostname] != null
- assert roleHistory.cloneFailedNodes().contains(hostname)
+ assert !nodemap[hostname].online
// add a failure of a node we've never head of
def newhost = "newhost"
@@ -428,9 +428,8 @@ class TestRoleHistoryContainerEvents extends BaseMockAppStateTest {
roleHistory.onNodesUpdated(nodesUpdated)
def nodemap2 = roleHistory.cloneNodemap()
- assert nodemap2.size() > endSize
- assert roleHistory.cloneFailedNodes().contains(newhost)
assert nodemap2[newhost]
+ assert !nodemap2[newhost].online
}
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
index 1c7a816..43eef3e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
@@ -18,6 +18,7 @@
package org.apache.slider.server.appmaster.model.mock
+import groovy.transform.CompileStatic
import org.apache.hadoop.yarn.api.records.NodeId
import org.apache.hadoop.yarn.api.records.NodeReport
import org.apache.hadoop.yarn.api.records.NodeState
@@ -26,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.Resource
/**
* Node report for testing
*/
+@CompileStatic
class MockNodeReport extends NodeReport {
NodeId nodeId;
NodeState nodeState;
@@ -37,4 +39,36 @@ class MockNodeReport extends NodeReport {
String healthReport;
long lastHealthReportTime;
Set<String> nodeLabels;
+
+ MockNodeReport() {
+ }
+
+ /**
+ * Create a single instance
+ * @param hostname
+ * @param nodeState
+ * @param label
+ */
+ MockNodeReport(String hostname, NodeState nodeState, String label ="") {
+ nodeId = NodeId.newInstance(hostname, 80)
+ this.nodeState = nodeState
+ this.httpAddress = "http$hostname:80"
+ this.nodeLabels = new HashSet<>()
+ nodeLabels.add(label)
+ }
+
+ /**
+ * Create a list of instances -one for each hostname
+ * @param hostnames hosts
+ * @param nodeState state of all of them
+ * @param label label for all of them
+ * @return
+ */
+ static List<MockNodeReport> createInstances(
+ List<String> hostnames,
+ NodeState nodeState = NodeState.RUNNING,
+ String label = "") {
+ hostnames.collect { String name ->
+ new MockNodeReport(name, nodeState, label)}
+ }
}
[09/12] incubator-slider git commit: SLIDER-982 chaos monkey to not
log @ info if its not actually active
Posted by st...@apache.org.
SLIDER-982 chaos monkey to not log @ info if its not actually active
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/ca6f9cee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/ca6f9cee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/ca6f9cee
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: ca6f9cee09b4b6c7b6dee83bdd06268e8ddd4d50
Parents: c4e7329
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 13:58:50 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 14:01:31 2015 +0000
----------------------------------------------------------------------
.../slider/server/appmaster/SliderAppMaster.java | 2 +-
.../server/appmaster/monkey/ChaosKillContainer.java | 16 +++++++---------
2 files changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ca6f9cee/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index df91d7f..3d062b5 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -2206,7 +2206,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
internals.getOptionBool(InternalKeys.CHAOS_MONKEY_ENABLED,
InternalKeys.DEFAULT_CHAOS_MONKEY_ENABLED);
if (!enabled) {
- log.info("Chaos monkey disabled");
+ log.debug("Chaos monkey disabled");
return false;
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ca6f9cee/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
index a5cdfc6..ae38e4c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
@@ -72,15 +72,13 @@ public class ChaosKillContainer implements ChaosTarget {
}
}
int size = liveContainers.size();
- if (size <= 0) {
- log.info("No containers to kill");
- return;
- }
- int target = random.nextInt(size);
- RoleInstance roleInstance = liveContainers.get(target);
- log.info("Killing {}", roleInstance);
+ if (size > 0) {
+ int target = random.nextInt(size);
+ RoleInstance roleInstance = liveContainers.get(target);
+ log.info("Killing {}", roleInstance);
- queues.schedule(new ActionKillContainer(roleInstance.getId(),
- DELAY, TimeUnit.MILLISECONDS, operationHandler));
+ queues.schedule(new ActionKillContainer(roleInstance.getId(),
+ DELAY, TimeUnit.MILLISECONDS, operationHandler));
+ }
}
}
[03/12] incubator-slider git commit: SLIDER-960 switch to groovy
2.4.4 (the ASF edition), with the faster java7 build
Posted by st...@apache.org.
SLIDER-960 switch to groovy 2.4.4 (the ASF edition), with the faster java7 build
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/618782a9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/618782a9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/618782a9
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 618782a9b66e0ae35e79d07888fec45d19ccb421
Parents: fed5d03
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:57:14 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:57:14 2015 +0000
----------------------------------------------------------------------
pom.xml | 7 +++++++
slider-core/pom.xml | 1 +
2 files changed, 8 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/618782a9/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3943ea9..866c448 100644
--- a/pom.xml
+++ b/pom.xml
@@ -511,6 +511,13 @@
</dependency>
<dependency>
+ <groupId>org.codehaus.groovy</groupId>
+ <artifactId>groovy-all</artifactId>
+ <classifier>indy</classifier>
+ <version>${groovy.version}</version>
+ </dependency>
+
+ <dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>${jcommander.version}</version>
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/618782a9/slider-core/pom.xml
----------------------------------------------------------------------
diff --git a/slider-core/pom.xml b/slider-core/pom.xml
index 704fa06..c7e8933 100644
--- a/slider-core/pom.xml
+++ b/slider-core/pom.xml
@@ -225,6 +225,7 @@
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId>
+ <classifier>indy</classifier>
<scope>test</scope>
</dependency>
[12/12] incubator-slider git commit: SLIDER-967 AA placement with
nodemap updates working
Posted by st...@apache.org.
SLIDER-967 AA placement with nodemap updates working
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/5a61b4cd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/5a61b4cd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/5a61b4cd
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 5a61b4cd8189ae02eb9eaeb8ffdb25604dcc4376
Parents: 6b13042
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 18:15:07 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 18:15:07 2015 +0000
----------------------------------------------------------------------
.../server/appmaster/SliderAppMaster.java | 13 ++-
.../slider/server/appmaster/state/AppState.java | 51 ++++++++++--
.../state/OutstandingRequestTracker.java | 8 +-
.../server/appmaster/state/RoleHistory.java | 5 --
.../server/appmaster/state/RoleStatus.java | 55 ++++++++-----
.../appstate/TestMockAppStateAAPlacement.groovy | 85 +++++++++++++++++---
.../model/history/TestRoleHistoryAA.groovy | 4 -
.../model/mock/BaseMockAppStateTest.groovy | 21 ++++-
8 files changed, 183 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index b54ea6c..eb7b26a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -1851,10 +1851,15 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
LOG_YARN.info("onNodesUpdated({})", updatedNodes.size());
log.info("Updated nodes {}", updatedNodes);
// Check if any nodes are lost or revived and update state accordingly
- List<AbstractRMOperation> operations = appState.onNodesUpdated(updatedNodes);
- execute(operations);
- // if there were any operations, trigger a review
- reviewRequestAndReleaseNodes("nodes updated");
+
+ AppState.NodeUpdatedOutcome outcome = appState.onNodesUpdated(updatedNodes);
+ if (!outcome.operations.isEmpty()) {
+ execute(outcome.operations);
+ }
+ // rigger a review if the cluster changed
+ if (outcome.clusterChanged) {
+ reviewRequestAndReleaseNodes("nodes updated");
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 0c66e25..6f38eb5 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -1222,11 +1222,11 @@ public class AppState {
* @return the container request to submit or null if there is none
*/
private AMRMClient.ContainerRequest createContainerRequest(RoleStatus role) {
- incrementRequestCount(role);
OutstandingRequest request = roleHistory.requestContainerForRole(role);
if (request == null) {
return null;
}
+ incrementRequestCount(role);
if (role.isAntiAffinePlacement()) {
role.setOutstandingAArequest(request);
}
@@ -1428,16 +1428,31 @@ public class AppState {
* Handle node update from the RM. This syncs up the node map with the RM's view
* @param updatedNodes updated nodes
*/
- public synchronized List<AbstractRMOperation> onNodesUpdated(List<NodeReport> updatedNodes) {
+ public synchronized NodeUpdatedOutcome onNodesUpdated(List<NodeReport> updatedNodes) {
boolean changed = roleHistory.onNodesUpdated(updatedNodes);
if (changed) {
- log.error("TODO: cancel AA requests and re-review");
- return cancelOutstandingAARequests();
+ log.info("YARN cluster changed —cancelling current AA requests");
+ List<AbstractRMOperation> operations = cancelOutstandingAARequests();
+ log.debug("Created {} cancel requests", operations.size());
+ return new NodeUpdatedOutcome(true, operations);
}
- return new ArrayList<>(0);
+ return new NodeUpdatedOutcome(false, new ArrayList<AbstractRMOperation>(0));
}
/**
+ * Return value of the {@link #onNodesUpdated(List)} call.
+ */
+ public static class NodeUpdatedOutcome {
+ public final boolean clusterChanged;
+ public final List<AbstractRMOperation> operations;
+
+ public NodeUpdatedOutcome(boolean clusterChanged,
+ List<AbstractRMOperation> operations) {
+ this.clusterChanged = clusterChanged;
+ this.operations = operations;
+ }
+ }
+ /**
* Is a role short lived by the threshold set for this application
* @param instance instance
* @return true if the instance is considered short lived
@@ -1885,13 +1900,17 @@ public class AppState {
}
/**
- * Escalate operation as triggered by external timer.
+ * Cancel any outstanding AA Requests, building up the list of ops to
+ * cancel, removing them from RoleHistory structures and the RoleStatus
+ * entries.
* @return a (usually empty) list of cancel/request operations.
*/
public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+ // get the list of cancel operations
List<AbstractRMOperation> operations = roleHistory.cancelOutstandingAARequests();
for (RoleStatus roleStatus : roleStatusMap.values()) {
- if (roleStatus.isAntiAffinePlacement()) {
+ if (roleStatus.isAARequestOutstanding()) {
+ log.info("Cancelling outstanding AA request for {}", roleStatus);
roleStatus.cancelOutstandingAARequest();
}
}
@@ -2225,6 +2244,9 @@ public class AppState {
log.info("Asking for next container for AA role {}", roleName);
role.decPendingAntiAffineRequests();
addContainerRequest(operations, createContainerRequest(role));
+ log.debug("Current AA role status {}", role);
+ } else {
+ log.info("AA request sequence completed for role {}", role);
}
}
@@ -2310,4 +2332,19 @@ public class AppState {
// now pretend it has just started
innerOnNodeManagerContainerStarted(cid);
}
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("AppState{");
+ sb.append("applicationLive=").append(applicationLive);
+ sb.append(", live nodes=").append(liveNodes.size());
+ sb.append(", startedContainers=").append(startedContainers);
+ sb.append(", startFailedContainerCount=").append(startFailedContainerCount);
+ sb.append(", surplusContainers=").append(surplusContainers);
+ sb.append(", failedContainerCount=").append(failedContainerCount);
+ sb.append(", outstandingContainerRequests=")
+ .append(outstandingContainerRequests);
+ sb.append('}');
+ return sb.toString();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index 4209449..66d201f 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -390,6 +390,7 @@ public class OutstandingRequestTracker {
@SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+ log.debug("Looking for AA request to cancel");
List<AbstractRMOperation> operations = new ArrayList<>();
// first, all placed requests
@@ -404,15 +405,18 @@ public class OutstandingRequestTracker {
}
}
// second, all open requests
- for (OutstandingRequest outstandingRequest : openRequests) {
+ ListIterator<OutstandingRequest> orit = openRequests.listIterator();
+ while (orit.hasNext()) {
+ OutstandingRequest outstandingRequest = orit.next();
synchronized (outstandingRequest) {
if (outstandingRequest.isAntiAffine()) {
// time to escalate
operations.add(outstandingRequest.createCancelOperation());
- openRequests.remove(outstandingRequest);
+ orit.remove();
}
}
}
+ log.info("Cancelling {} outstanding AA requests", operations.size());
return operations;
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index d7e6050..00b5226 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -1030,11 +1030,6 @@ public class RoleHistory {
List<OutstandingRequest> requests =
outstandingRequests.extractOpenRequestsForRole(roleId, toCancel);
- if (role.isAntiAffinePlacement()) {
- // TODO: AA
- // AA placement, so clear the role info
- role.cancelOutstandingAARequest();
- }
// are there any left?
int remaining = toCancel - requests.size();
// ask for some placed nodes
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index a14a84b..b530d18 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -171,7 +171,7 @@ public final class RoleStatus implements Cloneable {
public void cancel(long count) {
requested.decToFloor(count);
}
-
+
public void decRequested() {
cancel(1);
}
@@ -334,8 +334,11 @@ public final class RoleStatus implements Cloneable {
* if there are no outstanding requests.
*/
public void cancelOutstandingAARequest() {
- setOutstandingAArequest(null);
- setPendingAntiAffineRequests(0);
+ if (outstandingAArequest != null) {
+ setOutstandingAArequest(null);
+ setPendingAntiAffineRequests(0);
+ decRequested();
+ }
}
/**
@@ -366,25 +369,33 @@ public final class RoleStatus implements Cloneable {
}
@Override
- public synchronized String toString() {
- return "RoleStatus{" +
- "name='" + name + '\'' +
- ", key=" + key +
- ", desired=" + desired +
- ", actual=" + actual +
- ", requested=" + requested +
- ", releasing=" + releasing +
- ", pendingAntiAffineRequests=" + pendingAntiAffineRequests +
- ", failed=" + failed +
- ", failed recently=" + failedRecently.get() +
- ", node failed=" + nodeFailed.get() +
- ", pre-empted=" + preempted.get() +
- ", started=" + started +
- ", startFailed=" + startFailed +
- ", completed=" + completed +
- ", failureMessage='" + failureMessage + '\'' +
- ", providerRole=" + providerRole +
- '}';
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("RoleStatus{");
+ sb.append("name='").append(name).append('\'');
+ sb.append(", key=").append(key);
+ sb.append(", desired=").append(desired);
+ sb.append(", actual=").append(actual);
+ sb.append(", requested=").append(requested);
+ sb.append(", releasing=").append(releasing);
+ sb.append(", failed=").append(failed);
+ sb.append(", startFailed=").append(startFailed);
+ sb.append(", started=").append(started);
+ sb.append(", completed=").append(completed);
+ sb.append(", totalRequested=").append(totalRequested);
+ sb.append(", preempted=").append(preempted);
+ sb.append(", nodeFailed=").append(nodeFailed);
+ sb.append(", failedRecently=").append(failedRecently);
+ sb.append(", limitsExceeded=").append(limitsExceeded);
+ sb.append(", resourceRequirements=").append(resourceRequirements);
+ sb.append(", isAntiAffinePlacement=").append(isAntiAffinePlacement());
+ if (isAntiAffinePlacement()) {
+ sb.append(", pendingAntiAffineRequests=").append(pendingAntiAffineRequests);
+ sb.append(", outstandingAArequest=").append(outstandingAArequest);
+ }
+ sb.append(", failureMessage='").append(failureMessage).append('\'');
+ sb.append(", providerRole=").append(providerRole);
+ sb.append('}');
+ return sb.toString();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index c98f3bf..9a325d7 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -21,13 +21,18 @@ package org.apache.slider.server.appmaster.model.appstate
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.Container
+import org.apache.hadoop.yarn.api.records.NodeReport
+import org.apache.hadoop.yarn.api.records.NodeState
import org.apache.hadoop.yarn.client.api.AMRMClient
import org.apache.slider.providers.PlacementPolicy
import org.apache.slider.providers.ProviderRole
import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
import org.apache.slider.server.appmaster.model.mock.MockFactory
+import org.apache.slider.server.appmaster.model.mock.MockNodeReport
import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.state.AppState
import org.apache.slider.server.appmaster.state.AppStateBindingInfo
import org.apache.slider.server.appmaster.state.ContainerAssignment
import org.apache.slider.server.appmaster.state.NodeMap
@@ -55,6 +60,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
null)
RoleStatus aaRole
+ private int NODES = 3
@Override
AppStateBindingInfo buildBindingInfo() {
@@ -73,6 +79,11 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
aaRole = lookupRole(AAROLE.name)
}
+ @Override
+ MockYarnEngine createYarnEngine() {
+ new MockYarnEngine(NODES, 8)
+ }
+
/**
* Get the single request of a list of operations; includes the check for the size
* @param ops operations list of size 1
@@ -87,7 +98,6 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
public void testAllocateAANoLabel() throws Throwable {
assert cloneNodemap().size() > 0
-
// want multiple instances, so there will be iterations
aaRole.desired = 2
@@ -111,7 +121,6 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
assert !hostInstance.canHost(aaRole.key, "")
assert !hostInstance.canHost(aaRole.key, null)
-
// assignment
assert assignments.size() == 1
@@ -205,7 +214,6 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
submitOperations(ops, [], ops2).size()
assert 1 == ops2.size()
assertAllContainersAA()
-
}
/**
@@ -241,17 +249,70 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
}
/**
- * Scan through all containers and assert that the assignment is AA
- * @param index role index
+ *
+ * @throws Throwable
*/
- void assertAllContainersAA(String index) {
- def nodemap = stateAccess.nodeInformationSnapshot
- nodemap.each { name, info ->
- def nodeEntry = info.entries[index]
- assert nodeEntry == null ||
- (nodeEntry.live -nodeEntry.releasing + nodeEntry.starting) <= 1 ,
- "too many instances on node $name"
+ @Test
+ public void testAskForTooMany() throws Throwable {
+
+ describe("Ask for 1 more than the no of available nodes;" +
+ " expect the final request to be unsatisfied until the cluster changes size")
+ //more than expected
+ aaRole.desired = NODES + 1
+ List<AbstractRMOperation > operations = appState.reviewRequestAndReleaseNodes()
+ assert aaRole.AARequestOutstanding
+ assert NODES == aaRole.pendingAntiAffineRequests
+ for (int i = 0; i < NODES; i++) {
+ def iter = "Iteration $i role = $aaRole"
+ log.info(iter)
+ List<AbstractRMOperation > operationsOut = []
+ assert 1 == submitOperations(operations, [], operationsOut).size(), iter
+ operations = operationsOut
+ if (i + 1 < NODES) {
+ assert operations.size() == 2
+ } else {
+ assert operations.size() == 1
+ }
+ assertAllContainersAA()
}
+ // expect an outstanding AA request to be unsatisfied
+ assert aaRole.actual < aaRole.desired
+ assert !aaRole.requested
+ assert !aaRole.AARequestOutstanding
+ List<Container> allocatedContainers = engine.execute(operations, [])
+ assert 0 == allocatedContainers.size()
+ // in a review now, no more requests can be generated, as there is no space for AA placements,
+ // even though there is cluster capacity
+ assert 0 == appState.reviewRequestAndReleaseNodes().size()
+
+ // now do a node update (this doesn't touch the YARN engine; the node isn't really there)
+ def outcome = addNewNode()
+ assert cloneNodemap().size() == NODES + 1
+ assert outcome.clusterChanged
+ // no active calls to empty
+ assert outcome.operations.empty
+ assert 1 == appState.reviewRequestAndReleaseNodes().size()
+ }
+
+ protected AppState.NodeUpdatedOutcome addNewNode() {
+ NodeReport report = new MockNodeReport("four", NodeState.RUNNING) as NodeReport
+ appState.onNodesUpdated([report])
}
+ @Test
+ public void testClusterSizeChangesDuringRequestSequence() throws Throwable {
+ describe("Change the cluster size where the cluster size changes during a test sequence.")
+ aaRole.desired = NODES + 1
+ List<AbstractRMOperation> operations = appState.reviewRequestAndReleaseNodes()
+ assert aaRole.AARequestOutstanding
+ assert NODES == aaRole.pendingAntiAffineRequests
+ def outcome = addNewNode()
+ assert outcome.clusterChanged
+ // one call to cancel
+ assert 1 == outcome.operations.size()
+ // and on a review, one more to rebuild
+ assert 1 == appState.reviewRequestAndReleaseNodes().size()
+ }
+
+
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index 9d0efa2..de85bba 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -44,9 +44,6 @@ class TestRoleHistoryAA extends SliderTestBase {
NodeMap nodeMap, gpuNodeMap
RoleHistory roleHistory = new MockRoleHistory(MockFactory.ROLES)
- AMRMClient.ContainerRequest requestContainer(RoleStatus roleStatus) {
- roleHistory.requestContainerForRole(roleStatus).issuedRequest
- }
@Override
void setup() {
@@ -159,7 +156,6 @@ class TestRoleHistoryAA extends SliderTestBase {
assert node1.canHost(2,"")
}
-
public List<NodeInstance> assertNoAvailableNodes(int role = 1, String label = "") {
return verifyResultSize(0, nodeMap.findAllNodesForRole(role, label))
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index 3d472f1..4cb441d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -279,7 +279,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
* @return a list of roles
*/
public List<RoleInstance> createAndSubmitNodes() {
- return createAndSubmitNodes([])
+ return createAndSubmitNodes([], [])
}
/**
@@ -288,9 +288,10 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
* @return a list of roles allocated
*/
public List<RoleInstance> createAndSubmitNodes(
- List<ContainerId> containerIds) {
+ List<ContainerId> containerIds,
+ List<AbstractRMOperation> operationsOut = []) {
List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
- return submitOperations(ops, containerIds)
+ return submitOperations(ops, containerIds, operationsOut)
}
/**
@@ -398,4 +399,18 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
assert 1 == ops.size()
getRequest(ops, 0)
}
+
+ /**
+ * Scan through all containers and assert that the assignment is AA
+ * @param index role index
+ */
+ void assertAllContainersAA(String index) {
+ def nodemap = stateAccess.nodeInformationSnapshot
+ nodemap.each { name, info ->
+ def nodeEntry = info.entries[index]
+ assert nodeEntry == null ||
+ (nodeEntry.live - nodeEntry.releasing + nodeEntry.starting) <= 1,
+ "too many instances on node $name"
+ }
+ }
}
[04/12] incubator-slider git commit: javadoc tweak on
createZookeepernode
Posted by st...@apache.org.
javadoc tweak on createZookeepernode
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/c4e73291
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/c4e73291
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/c4e73291
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: c4e7329152283cb3183fe7de859382daf6aad519
Parents: 618782a
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:57:37 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:57:37 2015 +0000
----------------------------------------------------------------------
.../main/java/org/apache/slider/client/SliderClient.java | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/c4e73291/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
index eeda1e3..59e6848 100644
--- a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
+++ b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
@@ -503,6 +503,13 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe
/**
* Create the zookeeper node associated with the calling user and the cluster
+ *
+ * @param clusterName slider application name
+ * @param nameOnly should the name only be created (i.e. don't create ZK node)
+ * @return the path, using the policy implemented in
+ * {@link ZKIntegration#mkClusterPath(String, String)}
+ * @throws YarnException
+ * @throws IOException
*/
@VisibleForTesting
public String createZookeeperNode(String clusterName, Boolean nameOnly) throws YarnException, IOException {
@@ -522,7 +529,8 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe
* -throwing exceptions on any failure
* @param clusterName cluster name
* @param nameOnly create the path, not the node
- * @return the path, with the node created
+ * @return the path, using the policy implemented in
+ * {@link ZKIntegration#mkClusterPath(String, String)}
* @throws YarnException
* @throws IOException
* @throws KeeperException
[07/12] incubator-slider git commit: SLIDER-967 tests on rolehistory
& nodemap. low-level tests work,
but the app state ones aren't tagging requested nodes as unavailable
Posted by st...@apache.org.
SLIDER-967 tests on rolehistory & nodemap. low-level tests work, but the app state ones aren't tagging requested nodes as unavailable
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/a7ba72e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/a7ba72e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/a7ba72e0
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: a7ba72e0ede14b70806b44d61d81bc4357052dbc
Parents: 7899f59
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 12:03:11 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 12:03:11 2015 +0000
----------------------------------------------------------------------
.../slider/api/types/NodeEntryInformation.java | 18 +++++
.../server/appmaster/state/NodeInstance.java | 16 +++-
.../slider/server/appmaster/state/NodeMap.java | 1 +
.../server/appmaster/state/RoleHistory.java | 2 +-
.../appstate/TestMockAppStateAAPlacement.groovy | 18 ++++-
.../TestMockAppStateContainerFailure.groovy | 2 +-
.../model/history/TestRoleHistoryAA.groovy | 81 +++++++++++++++++---
.../model/mock/BaseMockAppStateTest.groovy | 4 +-
8 files changed, 125 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java b/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
index 482b0c7..15b57b0 100644
--- a/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
+++ b/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
@@ -58,4 +58,22 @@ public class NodeEntryInformation {
/** number of starting instances */
public int starting;
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder(
+ "NodeEntryInformation{");
+ sb.append("priority=").append(priority);
+ sb.append(", live=").append(live);
+ sb.append(", requested=").append(requested);
+ sb.append(", releasing=").append(releasing);
+ sb.append(", starting=").append(starting);
+ sb.append(", failed=").append(failed);
+ sb.append(", failedRecently=").append(failedRecently);
+ sb.append(", startFailed=").append(startFailed);
+ sb.append(", preempted=").append(preempted);
+ sb.append(", lastUsed=").append(lastUsed);
+ sb.append('}');
+ return sb.toString();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index ebd9d5a..8110bff 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -205,7 +205,6 @@ public class NodeInstance {
nodeEntries.add(nodeEntry);
}
-
/**
* run through each entry; gc'ing & removing old ones that don't have
* a recent failure count (we care about those)
@@ -381,5 +380,20 @@ public class NodeInstance {
return activeRight - activeLeft;
}
}
+ /**
+ * A comparator for sorting entries alphabetically
+ */
+ public static class CompareNames implements Comparator<NodeInstance>,
+ Serializable {
+
+ public CompareNames() {
+ }
+
+ @Override
+ public int compare(NodeInstance left, NodeInstance right) {
+ return left.hostname.compareTo(right.hostname);
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index aea48b3..23411ca 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -155,6 +155,7 @@ public class NodeMap extends HashMap<String, NodeInstance> {
nodes.add(instance);
}
}
+ Collections.sort(nodes, new NodeInstance.CompareNames());
return nodes;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 2ca5367..8a840fc 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -512,7 +512,7 @@ public class RoleHistory {
}
/**
- * Get a possibly emtpy list of suggested nodes for a role.
+ * Get a possibly empty list of suggested nodes for a role.
* @param id role ID
* @return list
*/
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index c7f59e3..64c0362 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -30,6 +30,7 @@ import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.operations.AbstractRMOperation
import org.apache.slider.server.appmaster.state.AppStateBindingInfo
import org.apache.slider.server.appmaster.state.ContainerAssignment
+import org.apache.slider.server.appmaster.state.NodeInstance
import org.apache.slider.server.appmaster.state.RoleInstance
import org.apache.slider.server.appmaster.state.RoleStatus
import org.junit.Test
@@ -105,6 +106,13 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
List<AbstractRMOperation> operations = []
appState.onContainersAllocated([allocated], assignments, operations)
+ def host = allocated.nodeId.host
+ def hostInstance = nodemap.get(host)
+ assert hostInstance.get(aaRole.key).starting == 1
+ assert !hostInstance.canHost(aaRole.key, "")
+ assert !hostInstance.canHost(aaRole.key, null)
+
+
// assignment
assert assignments.size() == 1
@@ -115,6 +123,10 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
// we also expect a new allocation request to have been issued
def req2 = getRequest(operations, 1)
+ assert req2.nodes.size() == engine.cluster.clusterSize - 1
+
+ assert !req2.nodes.contains(host)
+ assert !request.relaxLocality
// verify the pending couner is down
assert 0L == aaRole.pendingAntiAffineRequests
@@ -149,11 +161,13 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
assert ops2.empty
assert aaRole.pendingAntiAffineRequests == 2
+ assertAllContainersAA()
// next iter
assert 1 == submitOperations(ops, [], ops2).size()
assert 2 == ops2.size()
assert aaRole.pendingAntiAffineRequests == 1
+ assertAllContainersAA()
assert 0 == appState.reviewRequestAndReleaseNodes().size()
// now trigger the next execution cycle
@@ -187,6 +201,8 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
// next iter
submitOperations(ops, [], ops2).size()
assert 1 == ops2.size()
+ assertAllContainersAA()
+
}
/**
@@ -230,7 +246,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
nodemap.each { name, info ->
def nodeEntry = info.entries[index]
assert nodeEntry == null ||
- (nodeEntry.live + nodeEntry.starting + nodeEntry.releasing) <= 1 ,
+ (nodeEntry.live -nodeEntry.releasing + nodeEntry.starting) <= 1 ,
"too many instances on node $name"
}
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index 5b24a59..3235827 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -58,7 +58,7 @@ class TestMockAppStateContainerFailure extends BaseMockAppStateTest
*/
@Override
MockYarnEngine createYarnEngine() {
- return new MockYarnEngine(8000, 4)
+ return new MockYarnEngine(4, 8000)
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index fdbc3b4..9d0efa2 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -58,32 +58,40 @@ class TestRoleHistoryAA extends SliderTestBase {
@Test
public void testFindNodesInFullCluster() throws Throwable {
// all three will surface at first
- assertResultSize(3, nodeMap.findAllNodesForRole(1, ""))
+ verifyResultSize(3, nodeMap.findAllNodesForRole(1, ""))
}
@Test
public void testFindNodesInUnhealthyCluster() throws Throwable {
// all three will surface at first
- nodeMap.get("one").updateNode(new MockNodeReport("one",NodeState.UNHEALTHY))
- assertResultSize(2, nodeMap.findAllNodesForRole(1, ""))
+ markNodeOneUnhealthy()
+ verifyResultSize(2, nodeMap.findAllNodesForRole(1, ""))
+ }
+
+ public boolean markNodeOneUnhealthy() {
+ return setNodeState(nodeMap.get("one"), NodeState.UNHEALTHY)
+ }
+
+ protected boolean setNodeState(NodeInstance node, NodeState state) {
+ node.updateNode(new MockNodeReport(node.hostname, state))
}
@Test
public void testFindNoNodesWrongLabel() throws Throwable {
// all three will surface at first
- assertResultSize(0, nodeMap.findAllNodesForRole(1, "GPU"))
+ verifyResultSize(0, nodeMap.findAllNodesForRole(1, "GPU"))
}
@Test
public void testFindNoNodesRightLabel() throws Throwable {
// all three will surface at first
- assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, "GPU"))
+ verifyResultSize(3, gpuNodeMap.findAllNodesForRole(1, "GPU"))
}
@Test
public void testFindNoNodesNoLabel() throws Throwable {
// all three will surface at first
- assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, ""))
+ verifyResultSize(3, gpuNodeMap.findAllNodesForRole(1, ""))
}
@Test
@@ -92,7 +100,7 @@ class TestRoleHistoryAA extends SliderTestBase {
applyToNodeEntries(nodeMap) {
NodeEntry it -> it.request()
}
- assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
+ assertNoAvailableNodes(1)
}
@Test
@@ -101,14 +109,67 @@ class TestRoleHistoryAA extends SliderTestBase {
applyToNodeEntries(nodeMap) {
NodeEntry it -> it.request()
}
- assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
+ assertNoAvailableNodes(1)
+ }
+
+ /**
+ * Tag all nodes as starting, then walk one through a bit
+ * more of its lifecycle
+ */
+ @Test
+ public void testFindNoNodesLifecycle() throws Throwable {
+ // all three will surface at first
+ applyToNodeEntries(nodeMap) {
+ NodeEntry it -> it.onStarting()
+ }
+ assertNoAvailableNodes(1)
+
+ // walk one of the nodes through the lifecycle
+ def node1 = nodeMap.get("one")
+ assert !node1.canHost(1,"")
+ node1.get(1).onStartCompleted()
+ assert !node1.canHost(1,"")
+ assertNoAvailableNodes()
+ node1.get(1).release()
+ assert node1.canHost(1,"")
+ def list2 = verifyResultSize(1, nodeMap.findAllNodesForRole(1, ""))
+ assert list2[0].hostname == "one"
+
+ // now tag that node as unhealthy and expect it to go away
+ markNodeOneUnhealthy()
+ assertNoAvailableNodes()
+ }
+
+ @Test
+ public void testRolesIndependent() throws Throwable {
+ def node1 = nodeMap.get("one")
+ def role1 = node1.getOrCreate(1)
+ def role2 = node1.getOrCreate(2)
+ nodeMap.values().each {
+ it.updateNode(new MockNodeReport("", NodeState.UNHEALTHY))
+ }
+ assertNoAvailableNodes(1)
+ assertNoAvailableNodes(2)
+ assert setNodeState(node1, NodeState.RUNNING)
+ // tag role 1 as busy
+ role1.onStarting()
+ assertNoAvailableNodes(1)
+
+ verifyResultSize(1, nodeMap.findAllNodesForRole(2, ""))
+ assert node1.canHost(2,"")
+ }
+
+
+ public List<NodeInstance> assertNoAvailableNodes(int role = 1, String label = "") {
+ return verifyResultSize(0, nodeMap.findAllNodesForRole(role, label))
}
- def assertResultSize(int size, List<NodeInstance> list) {
+ List<NodeInstance> verifyResultSize(int size, List<NodeInstance> list) {
if (list.size() != size) {
- list.each { log.error(it.toFullString())}
+ list.each { log.error(it.toFullString()) }
}
assert size == list.size()
+ list
}
def applyToNodeEntries(Collection<NodeInstance> list, Closure cl) {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index e1660ee..3d472f1 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -69,10 +69,9 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
* @return
*/
public MockYarnEngine createYarnEngine() {
- return new MockYarnEngine(64, 1)
+ return new MockYarnEngine(8, 8)
}
-
@Override
void setup() {
super.setup()
@@ -89,7 +88,6 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
*/
void initApp(){
String historyDirName = testName;
- YarnConfiguration conf = SliderUtils.createConfiguration()
applicationId = new MockApplicationId(id: 1, clusterTimestamp: 0)
applicationAttemptId = new MockApplicationAttemptId(
applicationId: applicationId,
[06/12] incubator-slider git commit: SLIDER-967 Use nodemap to build
up location restrictions on AA placement. This is the core of the AA
placement algorithm: it finds nodes that are free to use.
Posted by st...@apache.org.
SLIDER-967 Use nodemap to build up location restrictions on AA placement.
This is the core of the AA placement algorithm: it finds nodes that are free to use.
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/7899f59a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/7899f59a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/7899f59a
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 7899f59a1cf12ae88775dcdd85a712f96cd6eb7c
Parents: 856ab84
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 20:11:50 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 20:11:50 2015 +0000
----------------------------------------------------------------------
.../slider/api/proto/RestTypeMarshalling.java | 10 +-
.../slider/api/types/NodeInformation.java | 4 +-
.../server/appmaster/SliderAppMaster.java | 6 +-
.../server/appmaster/rpc/SliderIPCService.java | 1 -
.../slider/server/appmaster/state/AppState.java | 91 ++++++++-------
.../server/appmaster/state/NodeEntry.java | 2 +-
.../server/appmaster/state/NodeInstance.java | 11 +-
.../slider/server/appmaster/state/NodeMap.java | 4 +-
.../appmaster/state/OutstandingRequest.java | 51 ++++++++-
.../state/OutstandingRequestTracker.java | 55 +++++++++
.../server/appmaster/state/RoleHistory.java | 112 +++++++++++++++++--
.../server/appmaster/state/RoleStatus.java | 26 ++++-
.../appstate/TestMockAppStateAAPlacement.groovy | 31 ++++-
.../model/history/TestRoleHistoryAA.groovy | 25 +++--
...stRoleHistoryFindNodesForNewInstances.groovy | 20 ++--
...tRoleHistoryOutstandingRequestTracker.groovy | 30 ++++-
.../TestRoleHistoryRequestTracking.groovy | 12 +-
.../model/mock/BaseMockAppStateTest.groovy | 4 +
18 files changed, 397 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java b/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
index b7985e6..85a8358 100644
--- a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
+++ b/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
@@ -38,6 +38,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
/**
@@ -160,8 +162,8 @@ public class RestTypeMarshalling {
builder.setLabels(info.labels);
}
- List<NodeEntryInformation> entries = info.entries;
- if (entries != null) {
+ if (info.entries != null) {
+ Collection<NodeEntryInformation> entries = info.entries.values();
for (NodeEntryInformation entry : entries) {
Messages.NodeEntryInformationProto.Builder node =
Messages.NodeEntryInformationProto.newBuilder();
@@ -192,7 +194,7 @@ public class RestTypeMarshalling {
info.state = wire.getState();
List<Messages.NodeEntryInformationProto> entriesList = wire.getEntriesList();
if (entriesList != null) {
- info.entries = new ArrayList<>(entriesList.size());
+ info.entries = new HashMap<>(entriesList.size());
for (Messages.NodeEntryInformationProto entry : entriesList) {
NodeEntryInformation nei = new NodeEntryInformation();
nei.failed = entry.getFailed();
@@ -205,7 +207,7 @@ public class RestTypeMarshalling {
nei.releasing = entry.getReleasing();
nei.startFailed = entry.getStartFailed();
nei.starting = entry.getStarting();
- info.entries.add(nei);
+ info.entries.put(Integer.toString(nei.priority), nei);
}
}
return info;
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java b/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
index 049ee52..edf7e21 100644
--- a/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
+++ b/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
@@ -22,7 +22,9 @@ import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import org.codehaus.jackson.map.annotate.JsonSerialize;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
/**
* Serialized node information. Must be kept in sync with the protobuf equivalent.
@@ -38,5 +40,5 @@ public class NodeInformation {
public long lastUpdated;
public String rackName;
public String state;
- public List<NodeEntryInformation> entries = new ArrayList<>();
+ public Map<String, NodeEntryInformation> entries = new HashMap<>();
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index d74688b..171451e 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -1826,7 +1826,6 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
*/
private void releaseAllContainers() {
List<AbstractRMOperation> operations = appState.releaseAllContainers();
- providerRMOperationHandler.execute(operations);
//now apply the operations
execute(operations);
}
@@ -1852,7 +1851,10 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
LOG_YARN.info("onNodesUpdated({})", updatedNodes.size());
log.info("Updated nodes {}", updatedNodes);
// Check if any nodes are lost or revived and update state accordingly
- appState.onNodesUpdated(updatedNodes);
+ List<AbstractRMOperation> operations = appState.onNodesUpdated(updatedNodes);
+ execute(operations);
+ // if there were any operations, trigger a review
+ reviewRequestAndReleaseNodes("nodes updated");
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
index bb8f512..a983f53 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
@@ -448,7 +448,6 @@ public class SliderIPCService extends AbstractService
}
}
-
@Override
public Messages.WrappedJsonProto getModelDesired(Messages.EmptyPayloadProto request) throws IOException {
return lookupAggregateConf(MODEL_DESIRED);
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 063a7fc..c960510 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -663,7 +663,6 @@ public class AppState {
return newRole;
}
-
/**
* Actions to perform when an instance definition is updated
* Currently:
@@ -678,9 +677,9 @@ public class AppState {
*
* @throws BadConfigException
*/
- private synchronized void onInstanceDefinitionUpdated() throws
- BadConfigException,
- IOException {
+ private synchronized void onInstanceDefinitionUpdated()
+ throws BadConfigException, IOException {
+
log.debug("Instance definition updated");
//note the time
snapshotTime = now();
@@ -1220,11 +1219,14 @@ public class AppState {
* Anti-Affine: the {@link RoleStatus#outstandingAArequest} is set here.
* This is where role history information will be used for placement decisions.
* @param role role
- * @return the container request to submit
+ * @return the container request to submit or null if there is none
*/
private AMRMClient.ContainerRequest createContainerRequest(RoleStatus role) {
incrementRequestCount(role);
OutstandingRequest request = roleHistory.requestContainerForRole(role);
+ if (request == null) {
+ return null;
+ }
if (role.isAntiAffinePlacement()) {
role.setOutstandingAArequest(request);
}
@@ -1426,12 +1428,13 @@ public class AppState {
* Handle node update from the RM. This syncs up the node map with the RM's view
* @param updatedNodes updated nodes
*/
- public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
+ public synchronized List<AbstractRMOperation> onNodesUpdated(List<NodeReport> updatedNodes) {
boolean changed = roleHistory.onNodesUpdated(updatedNodes);
if (changed) {
- //TODO
log.error("TODO: cancel AA requests and re-review");
+ return cancelOutstandingAARequests();
}
+ return new ArrayList<>(0);
}
/**
@@ -1882,6 +1885,20 @@ public class AppState {
}
/**
+ * Escalate operation as triggered by external timer.
+ * @return a (usually empty) list of cancel/request operations.
+ */
+ public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+ List<AbstractRMOperation> operations = roleHistory.cancelOutstandingAARequests();
+ for (RoleStatus roleStatus : roleStatusMap.values()) {
+ if (roleStatus.isAntiAffinePlacement()) {
+ roleStatus.cancelOutstandingAARequest();
+ }
+ }
+ return operations;
+ }
+
+ /**
* Look at the allocation status of one role, and trigger add/release
* actions if the number of desired role instances doesn't equal
* (actual + pending).
@@ -1900,7 +1917,6 @@ public class AppState {
long delta;
long expected;
String name = role.getName();
- boolean isAA = role.isAntiAffinePlacement();
synchronized (role) {
delta = role.getDelta();
expected = role.getDesired();
@@ -1920,19 +1936,24 @@ public class AppState {
if (delta > 0) {
// more workers needed than we have -ask for more
- log.info("{}: Asking for {} more nodes(s) for a total of {} ", name,
- delta, expected);
-
- // TODO: AA RH to help here by only allowing one request for an AA
+ log.info("{}: Asking for {} more nodes(s) for a total of {} ", name, delta, expected);
- if (isAA) {
- // build one only if there is none outstanding
+ if (role.isAntiAffinePlacement()) {
+ // build one only if there is none outstanding, the role history knows
+ // enough about the cluster to ask, and there is somewhere to place
+ // the node
if (role.getPendingAntiAffineRequests() == 0
+ && !role.isAARequestOutstanding()
&& roleHistory.canPlaceAANodes()) {
- log.info("Starting an anti-affine request sequence for {} nodes", delta);
// log the number outstanding
- role.incPendingAntiAffineRequests(delta - 1);
- addContainerRequest(operations, createContainerRequest(role));
+ AMRMClient.ContainerRequest request = createContainerRequest(role);
+ if (request != null) {
+ log.info("Starting an anti-affine request sequence for {} nodes", delta);
+ role.incPendingAntiAffineRequests(delta - 1);
+ addContainerRequest(operations, request);
+ } else {
+ log.info("No location for anti-affine request");
+ }
} else {
if (roleHistory.canPlaceAANodes()) {
log.info("Adding {} more anti-affine requests", delta);
@@ -1955,22 +1976,7 @@ public class AppState {
// reduce the number expected (i.e. subtract the delta)
long excess = -delta;
- if (isAA) {
- // there may be pending requests which can be cancelled here
- long pending = role.getPendingAntiAffineRequests();
- if (excess <= pending) {
- long outstanding = pending - excess;
- log.info("Cancelling {} pending AA allocations, leaving {}", excess, outstanding);
- role.setPendingAntiAffineRequests(outstanding);
- excess = 0;
- } else {
- // not enough
- log.info("Cancelling all pending AA allocations");
- role.setPendingAntiAffineRequests(0);
- excess -= pending;
- }
- }
- // how many requests are outstanding?
+ // how many requests are outstanding? for AA roles, this includes pending
long outstandingRequests = role.getRequested();
if (outstandingRequests > 0) {
// outstanding requests.
@@ -2052,15 +2058,22 @@ public class AppState {
return operations;
}
+ /**
+ * Add a container request if the request is non-null
+ * @param operations operations to add the entry to
+ * @param containerAsk what to ask for
+ */
private void addContainerRequest(List<AbstractRMOperation> operations,
AMRMClient.ContainerRequest containerAsk) {
- log.info("Container ask is {} and label = {}", containerAsk,
- containerAsk.getNodeLabelExpression());
- int askMemory = containerAsk.getCapability().getMemory();
- if (askMemory > this.containerMaxMemory) {
- log.warn("Memory requested: {} > max of {}", askMemory, containerMaxMemory);
+ if (containerAsk != null) {
+ log.info("Container ask is {} and label = {}", containerAsk,
+ containerAsk.getNodeLabelExpression());
+ int askMemory = containerAsk.getCapability().getMemory();
+ if (askMemory > this.containerMaxMemory) {
+ log.warn("Memory requested: {} > max of {}", askMemory, containerMaxMemory);
+ }
+ operations.add(new ContainerRequestOperation(containerAsk));
}
- operations.add(new ContainerRequestOperation(containerAsk));
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
index 6dae3c6..c180f88 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
@@ -90,7 +90,7 @@ public class NodeEntry {
* the number of instances > 1.
*/
public synchronized boolean isAvailable() {
- return getActive() == 0 && (requested == 0) && starting == 0;
+ return getActive() == 0 && requested == 0 && starting == 0;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index b805ffb..ebd9d5a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -22,10 +22,12 @@ import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.slider.api.types.NodeInformation;
import org.apache.slider.common.tools.Comparators;
+import org.apache.slider.common.tools.SliderUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
@@ -64,7 +66,8 @@ public class NodeInstance {
private String nodeLabels = "";
/**
- * The list of node entries of specific roles
+ * An unordered list of node entries of specific roles. There's nothing
+ * indexed so as to support sparser datastructures.
*/
private final List<NodeEntry> nodeEntries;
@@ -307,9 +310,9 @@ public class NodeInstance {
info.rackName = nodeReport.getRackName();
info.healthReport = nodeReport.getHealthReport();
}
- info.entries = new ArrayList<>(nodeEntries.size());
+ info.entries = new HashMap<>(nodeEntries.size());
for (NodeEntry nodeEntry : nodeEntries) {
- info.entries.add(nodeEntry.serialize());
+ info.entries.put(Integer.toString(nodeEntry.rolePriority), nodeEntry.serialize());
}
return info;
}
@@ -323,7 +326,7 @@ public class NodeInstance {
*/
public boolean canHost(int role, String label) {
return isOnline()
- && (label.isEmpty() || label.equals(nodeLabels)) // label match
+ && (SliderUtils.isUnset(label) || label.equals(nodeLabels)) // label match
&& (get(role) == null || get(role).isAvailable()); // no live role
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index 2887c9e..aea48b3 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -146,9 +146,9 @@ public class NodeMap extends HashMap<String, NodeInstance> {
* Scan the current node map for all nodes capable of hosting an instance
* @param role role ID
* @param label label which must match, or "" for no label checks
- * @return a list of node instances matching the criteria.
+ * @return a possibly empty list of node instances matching the criteria.
*/
- public List<NodeInstance> findNodesForRole(int role, String label) {
+ public List<NodeInstance> findAllNodesForRole(int role, String label) {
List<NodeInstance> nodes = new ArrayList<>(size());
for (NodeInstance instance : values()) {
if (instance.canHost(role, label)) {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
index a9d4b52..e211e7f 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
@@ -29,6 +29,7 @@ import org.apache.slider.server.appmaster.operations.CancelSingleRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -53,6 +54,13 @@ public final class OutstandingRequest extends RoleHostnamePair {
public final NodeInstance node;
/**
+ * A list of all possible nodes to list in an AA request. For a non-AA
+ * request where {@link #node} is set, element 0 of the list is the same
+ * value.
+ */
+ public final List<NodeInstance> nodes = new ArrayList<>(1);
+
+ /**
* Optional label. This is cached as the request option (explicit-location + label) is forbidden,
* yet the label needs to be retained for escalation.
*/
@@ -95,6 +103,12 @@ public final class OutstandingRequest extends RoleHostnamePair {
private int priority = -1;
/**
+ * Is this an Anti-affine request which should be cancelled on
+ * a cluster resize?
+ */
+ private boolean antiAffine = false;
+
+ /**
* Create a request
* @param roleId role
* @param node node -can be null
@@ -103,6 +117,7 @@ public final class OutstandingRequest extends RoleHostnamePair {
NodeInstance node) {
super(roleId, node != null ? node.hostname : null);
this.node = node;
+ nodes.add(node);
}
/**
@@ -119,6 +134,19 @@ public final class OutstandingRequest extends RoleHostnamePair {
}
/**
+ * Create an Anti-affine reques, including all listed nodes (there must be one)
+ * as targets.
+ * @param roleId role
+ * @param nodes list of nodes
+ */
+ public OutstandingRequest(int roleId, List<NodeInstance> nodes) {
+ super(roleId, nodes.get(0).hostname);
+ this.node = null;
+ this.antiAffine = true;
+ this.nodes.addAll(nodes);
+ }
+
+ /**
* Is the request located in the cluster, that is: does it have a node.
* @return true if a node instance was supplied in the constructor
*/
@@ -150,6 +178,14 @@ public final class OutstandingRequest extends RoleHostnamePair {
return priority;
}
+ public boolean isAntiAffine() {
+ return antiAffine;
+ }
+
+ public void setAntiAffine(boolean antiAffine) {
+ this.antiAffine = antiAffine;
+ }
+
/**
* Build a container request.
* <p>
@@ -183,7 +219,19 @@ public final class OutstandingRequest extends RoleHostnamePair {
NodeInstance target = this.node;
String nodeLabels;
- if (target != null) {
+ if (isAntiAffine()) {
+ hosts = new String[nodes.size()];
+ int c = 0;
+ for (NodeInstance nodeInstance : nodes) {
+ hosts[c] = nodeInstance.hostname;
+ c++;
+ }
+ log.info("Creating anti-affine request across {} nodes; first node = {}", c, hostname);
+ escalated = false;
+ mayEscalate = false;
+ relaxLocality = false;
+ nodeLabels = label;
+ } else if (target != null) {
// placed request. Hostname is used in request
hosts = new String[1];
hosts[0] = target.hostname;
@@ -215,7 +263,6 @@ public final class OutstandingRequest extends RoleHostnamePair {
pri,
relaxLocality,
nodeLabels);
-
validate();
return issuedRequest;
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index ecdc07a..4209449 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -93,6 +93,23 @@ public class OutstandingRequestTracker {
}
/**
+ * Create a new Anti-affine request for the specific role
+ * <p>
+ * It is added to {@link #openRequests}
+ * <p>
+ * This does not update the node instance's role's request count
+ * @param role role index
+ * @param nodes list of suitable nodes
+ * @return a new request
+ */
+ public synchronized OutstandingRequest newAARequest(int role, List<NodeInstance> nodes) {
+ Preconditions.checkArgument(!nodes.isEmpty());
+ OutstandingRequest request = new OutstandingRequest(role, nodes);
+ openRequests.add(request);
+ return request;
+ }
+
+ /**
* Look up any oustanding request to a (role, hostname).
* @param role role index
* @param hostname hostname
@@ -364,6 +381,43 @@ public class OutstandingRequestTracker {
}
/**
+ * Cancel all outstanding AA requests from the lists of requests.
+ *
+ * This does not remove them from the role status; they must be reset
+ * by the caller.
+ *
+ */
+ @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
+ public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+
+ List<AbstractRMOperation> operations = new ArrayList<>();
+
+ // first, all placed requests
+ for (Map.Entry<RoleHostnamePair, OutstandingRequest> entry : placedRequests.entrySet()) {
+ OutstandingRequest outstandingRequest = entry.getValue();
+ synchronized (outstandingRequest) {
+ if (outstandingRequest.isAntiAffine()) {
+ // time to escalate
+ operations.add(outstandingRequest.createCancelOperation());
+ placedRequests.remove(entry.getKey());
+ }
+ }
+ }
+ // second, all open requests
+ for (OutstandingRequest outstandingRequest : openRequests) {
+ synchronized (outstandingRequest) {
+ if (outstandingRequest.isAntiAffine()) {
+ // time to escalate
+ operations.add(outstandingRequest.createCancelOperation());
+ openRequests.remove(outstandingRequest);
+ }
+ }
+ }
+
+ return operations;
+ }
+
+ /**
* Extract a specific number of open requests for a role
* @param roleId role Id
* @param count count to extract
@@ -382,6 +436,7 @@ public class OutstandingRequestTracker {
}
return results;
}
+
/**
* Extract a specific number of placed requests for a role
* @param roleId role Id
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index df1f4e1..2ca5367 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -19,13 +19,13 @@
package org.apache.slider.server.appmaster.state;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.slider.api.types.NodeInformation;
import org.apache.slider.common.tools.SliderUtils;
import org.apache.slider.core.exceptions.BadConfigException;
@@ -46,11 +46,9 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
/**
@@ -549,7 +547,7 @@ public class RoleHistory {
* @return the instance, or null for none
*/
@VisibleForTesting
- public synchronized NodeInstance findNodeForNewInstance(RoleStatus role) {
+ public synchronized NodeInstance findRecentNodeForNewInstance(RoleStatus role) {
if (!role.isPlacementDesired()) {
// no data locality policy
return null;
@@ -591,6 +589,18 @@ public class RoleHistory {
}
/**
+ * Find a node for use
+ * @param role role
+ * @return the instance, or null for none
+ */
+ @VisibleForTesting
+ public synchronized List<NodeInstance> findNodeForNewAAInstance(RoleStatus role) {
+ // all nodes that are live and can host the role; no attempt to exclude ones
+ // considered failing
+ return nodemap.findAllNodesForRole(role.getKey(), role.getLabelExpression());
+ }
+
+ /**
* Request an instance on a given node.
* An outstanding request is created & tracked, with the
* relevant node entry for that role updated.
@@ -615,15 +625,29 @@ public class RoleHistory {
* Find a node for a role and request an instance on that (or a location-less
* instance)
* @param role role status
- * @return a request ready to go
+ * @return a request ready to go, or null if this is an AA request and no
+ * location can be found.
*/
public synchronized OutstandingRequest requestContainerForRole(RoleStatus role) {
Resource resource = recordFactory.newResource();
role.copyResourceRequirements(resource);
- NodeInstance node = findNodeForNewInstance(role);
- // TODO AA -what if there are no suitable nodes?
- return requestInstanceOnNode(node, role, resource);
+ if (role.isAntiAffinePlacement()) {
+ // if a placement can be found, return it.
+ List<NodeInstance> nodes = findNodeForNewAAInstance(role);
+ if (!nodes.isEmpty()) {
+ OutstandingRequest outstanding
+ = outstandingRequests.newAARequest(role.getKey(), nodes);
+ outstanding.buildContainerRequest(resource, role, now());
+ return outstanding;
+ } else {
+ log.warn("No suitable location for {}", role.getName());
+ return null;
+ }
+ } else {
+ NodeInstance node = findRecentNodeForNewInstance(role);
+ return requestInstanceOnNode(node, role, resource);
+ }
}
/**
@@ -972,6 +996,26 @@ public class RoleHistory {
public List<AbstractRMOperation> escalateOutstandingRequests() {
return outstandingRequests.escalateOutstandingRequests(now());
}
+ /**
+ * Escalate operation as triggered by external timer.
+ * @return a (usually empty) list of cancel/request operations.
+ */
+ public List<AbstractRMOperation> cancelOutstandingAARequests() {
+ return outstandingRequests.cancelOutstandingAARequests();
+ }
+
+ /**
+ * Cancel a number of outstanding requests for a role -that is, not
+ * actual containers, just requests for new ones.
+ * @param role role
+ * @param toCancel number to cancel
+ * @return a list of cancellable operations.
+ */
+ public List<AbstractRMOperation> cancelRequestsForRole(RoleStatus role, int toCancel) {
+ return role.isAntiAffinePlacement() ?
+ cancelRequestsForAARole(role, toCancel)
+ : cancelRequestsForSimpleRole(role, toCancel);
+ }
/**
* Build the list of requests to cancel from the outstanding list.
@@ -979,19 +1023,67 @@ public class RoleHistory {
* @param toCancel number to cancel
* @return a list of cancellable operations.
*/
- public synchronized List<AbstractRMOperation> cancelRequestsForRole(RoleStatus role, int toCancel) {
+ private synchronized List<AbstractRMOperation> cancelRequestsForSimpleRole(RoleStatus role, int toCancel) {
+ Preconditions.checkArgument(toCancel > 0,
+ "trying to cancel invalid number of requests: " + toCancel);
List<AbstractRMOperation> results = new ArrayList<>(toCancel);
// first scan through the unplaced request list to find all of a role
int roleId = role.getKey();
List<OutstandingRequest> requests =
outstandingRequests.extractOpenRequestsForRole(roleId, toCancel);
+ if (role.isAntiAffinePlacement()) {
+ // TODO: AA
+ // AA placement, so clear the role info
+ role.cancelOutstandingAARequest();
+ }
// are there any left?
int remaining = toCancel - requests.size();
// ask for some placed nodes
requests.addAll(outstandingRequests.extractPlacedRequestsForRole(roleId, remaining));
- // TODO AA: clear anything here?
+ // build cancellations
+ for (OutstandingRequest request : requests) {
+ results.add(request.createCancelOperation());
+ }
+ return results;
+ }
+
+ /**
+ * Build the list of requests to cancel for an AA role. This reduces the number
+ * of outstanding pending requests first, then cancels any active request,
+ * before finally asking for any placed containers
+ * @param role role
+ * @param toCancel number to cancel
+ * @return a list of cancellable operations.
+ */
+ private synchronized List<AbstractRMOperation> cancelRequestsForAARole(RoleStatus role, int toCancel) {
+ List<AbstractRMOperation> results = new ArrayList<>(toCancel);
+ int roleId = role.getKey();
+ List<OutstandingRequest> requests = new ArrayList<>(toCancel);
+ // there may be pending requests which can be cancelled here
+ long pending = role.getPendingAntiAffineRequests();
+ if (pending > 0) {
+ // there are some pending ones which can be cancelled first
+ long pendingToCancel = Math.min(pending, toCancel);
+ log.info("Cancelling {} pending AA allocations, leaving {}", toCancel,
+ pendingToCancel);
+ role.setPendingAntiAffineRequests(pending - pendingToCancel);
+ toCancel -= pendingToCancel;
+ }
+ if (toCancel > 0 && role.isAARequestOutstanding()) {
+ // not enough
+ log.info("Cancelling current AA request");
+ // find the single entry which may be running
+ requests = outstandingRequests.extractOpenRequestsForRole(roleId, toCancel);
+ role.cancelOutstandingAARequest();
+ toCancel--;
+ }
+
+ // ask for some excess nodes
+ if (toCancel > 0) {
+ requests.addAll(outstandingRequests.extractPlacedRequestsForRole(roleId, toCancel));
+ }
// build cancellations
for (OutstandingRequest request : requests) {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 1beaddc..a14a84b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -71,7 +71,7 @@ public final class RoleStatus implements Cloneable {
private final LongGauge pendingAntiAffineRequests = new LongGauge(0);
/** any pending AA request */
- private OutstandingRequest outstandingAArequest = null;
+ private volatile OutstandingRequest outstandingAArequest = null;
private String failureMessage = "";
@@ -155,8 +155,12 @@ public final class RoleStatus implements Cloneable {
return actual.decToFloor(1);
}
+ /**
+ * Get the request count. For AA roles, this includes pending ones.
+ * @return a count of requested containers
+ */
public long getRequested() {
- return requested.get();
+ return requested.get() + pendingAntiAffineRequests.get();
}
public long incRequested() {
@@ -209,6 +213,14 @@ public final class RoleStatus implements Cloneable {
}
/**
+ * Probe for an outstanding AA request being true
+ * @return true if there is an outstanding AA Request
+ */
+ public boolean isAARequestOutstanding() {
+ return outstandingAArequest != null;
+ }
+
+ /**
* Note that a role failed, text will
* be used in any diagnostics if an exception
* is later raised.
@@ -312,13 +324,21 @@ public final class RoleStatus implements Cloneable {
/**
* Complete the outstanding AA request (there's no check for one in progress, caller
* expected to have done that).
- * @return the number of outstanding requests
*/
public void completeOutstandingAARequest() {
setOutstandingAArequest(null);
}
/**
+ * Cancel any outstanding AA request. Harmless if the role is non-AA, or
+ * if there are no outstanding requests.
+ */
+ public void cancelOutstandingAARequest() {
+ setOutstandingAArequest(null);
+ setPendingAntiAffineRequests(0);
+ }
+
+ /**
* Get the number of roles we are short of.
* nodes released are ignored.
* @return the positive or negative number of roles to add/release.
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index baf88dc..c7f59e3 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -93,8 +93,8 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
AMRMClient.ContainerRequest request = getSingleRequest(ops)
- assert request.relaxLocality
- assert request.nodes == null
+ assert !request.relaxLocality
+ assert request.nodes.size() == engine.cluster.clusterSize
assert request.racks == null
assert request.capability
@@ -131,6 +131,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
assert appState.onNodeManagerContainerStarted(container.id)
ops = appState.reviewRequestAndReleaseNodes()
assert ops.size() == 0
+ assertAllContainersAA();
}
@Test
@@ -160,6 +161,8 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
assert 1 == submitOperations(ops2, [], ops3).size()
assert 2 == ops3.size()
assert aaRole.pendingAntiAffineRequests == 0
+ assertAllContainersAA()
+
}
@Test
@@ -169,14 +172,17 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
getSingleRequest(ops)
assert aaRole.pendingAntiAffineRequests == 1
+ assert aaRole.AARequestOutstanding
// flex down so that the next request should be cancelled
aaRole.desired = 1
- // expect: no new reqests, pending count --
+ // expect: no new requests, pending count --
List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
assert ops2.empty
+ assert aaRole.AARequestOutstanding
assert aaRole.pendingAntiAffineRequests == 0
+ assertAllContainersAA()
// next iter
submitOperations(ops, [], ops2).size()
@@ -195,12 +201,14 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
getSingleRequest(ops)
assert aaRole.pendingAntiAffineRequests == 0
+ assert aaRole.AARequestOutstanding
// flex down so that the next request should be cancelled
aaRole.desired = 0
// expect: no new reqests, pending count --
List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
assert aaRole.pendingAntiAffineRequests == 0
+ assert !aaRole.AARequestOutstanding
assert ops2.size() == 1
getSingleCancel(ops2)
@@ -209,5 +217,22 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
assert 1 == ops2.size()
}
+ void assertAllContainersAA() {
+ assertAllContainersAA(Integer.toString(aaRole.key))
+ }
+
+ /**
+ * Scan through all containers and assert that the assignment is AA
+ * @param index role index
+ */
+ void assertAllContainersAA(String index) {
+ def nodemap = stateAccess.nodeInformationSnapshot
+ nodemap.each { name, info ->
+ def nodeEntry = info.entries[index]
+ assert nodeEntry == null ||
+ (nodeEntry.live + nodeEntry.starting + nodeEntry.releasing) <= 1 ,
+ "too many instances on node $name"
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index f99326f..fdbc3b4 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -21,10 +21,15 @@ package org.apache.slider.server.appmaster.model.history
import groovy.util.logging.Slf4j
import org.apache.hadoop.yarn.api.records.NodeReport
import org.apache.hadoop.yarn.api.records.NodeState
+import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.slider.server.appmaster.model.mock.MockFactory
import org.apache.slider.server.appmaster.model.mock.MockNodeReport
+import org.apache.slider.server.appmaster.model.mock.MockRoleHistory
import org.apache.slider.server.appmaster.state.NodeEntry
import org.apache.slider.server.appmaster.state.NodeInstance
import org.apache.slider.server.appmaster.state.NodeMap
+import org.apache.slider.server.appmaster.state.RoleHistory
+import org.apache.slider.server.appmaster.state.RoleStatus
import org.apache.slider.test.SliderTestBase
import org.junit.Test
@@ -37,44 +42,48 @@ class TestRoleHistoryAA extends SliderTestBase {
List<String> hostnames = ["one", "two", "three"]
NodeMap nodeMap, gpuNodeMap
+ RoleHistory roleHistory = new MockRoleHistory(MockFactory.ROLES)
+
+ AMRMClient.ContainerRequest requestContainer(RoleStatus roleStatus) {
+ roleHistory.requestContainerForRole(roleStatus).issuedRequest
+ }
@Override
void setup() {
super.setup()
nodeMap = createNodeMap(hostnames, NodeState.RUNNING)
gpuNodeMap = createNodeMap(hostnames, NodeState.RUNNING, "GPU")
-
}
@Test
public void testFindNodesInFullCluster() throws Throwable {
// all three will surface at first
- assertResultSize(3, nodeMap.findNodesForRole(1, ""))
+ assertResultSize(3, nodeMap.findAllNodesForRole(1, ""))
}
@Test
public void testFindNodesInUnhealthyCluster() throws Throwable {
// all three will surface at first
nodeMap.get("one").updateNode(new MockNodeReport("one",NodeState.UNHEALTHY))
- assertResultSize(2, nodeMap.findNodesForRole(1, ""))
+ assertResultSize(2, nodeMap.findAllNodesForRole(1, ""))
}
@Test
public void testFindNoNodesWrongLabel() throws Throwable {
// all three will surface at first
- assertResultSize(0, nodeMap.findNodesForRole(1, "GPU"))
+ assertResultSize(0, nodeMap.findAllNodesForRole(1, "GPU"))
}
@Test
public void testFindNoNodesRightLabel() throws Throwable {
// all three will surface at first
- assertResultSize(3, gpuNodeMap.findNodesForRole(1, "GPU"))
+ assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, "GPU"))
}
@Test
public void testFindNoNodesNoLabel() throws Throwable {
// all three will surface at first
- assertResultSize(3, gpuNodeMap.findNodesForRole(1, ""))
+ assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, ""))
}
@Test
@@ -83,7 +92,7 @@ class TestRoleHistoryAA extends SliderTestBase {
applyToNodeEntries(nodeMap) {
NodeEntry it -> it.request()
}
- assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+ assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
}
@Test
@@ -92,7 +101,7 @@ class TestRoleHistoryAA extends SliderTestBase {
applyToNodeEntries(nodeMap) {
NodeEntry it -> it.request()
}
- assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+ assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
}
def assertResultSize(int size, List<NodeInstance> list) {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
index 63aa6d2..f36724e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
@@ -33,10 +33,8 @@ import org.junit.Test
/**
* Testing finding nodes for new instances.
- * These tests validate the (currently) suboptimal
- * behavior of not listing any known nodes when there
- * are none in the available list -even if there are nodes
- * known to be not running live instances in the cluster.
+ *
+ * This stresses the non-AA codepath
*/
@Slf4j
@CompileStatic
@@ -71,7 +69,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
public List<NodeInstance> findNodes(int count, RoleStatus roleStatus = roleStat) {
List < NodeInstance > found = [];
for (int i = 0; i < count; i++) {
- NodeInstance f = roleHistory.findNodeForNewInstance(roleStatus)
+ NodeInstance f = roleHistory.findRecentNodeForNewInstance(roleStatus)
if (f) {
found << f
};
@@ -81,17 +79,17 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
@Test
public void testFind1NodeR0() throws Throwable {
- NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+ NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
log.info("found: $found")
assert [age3Active0].contains(found)
}
@Test
public void testFind2NodeR0() throws Throwable {
- NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+ NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
log.info("found: $found")
assert [age2Active0, age3Active0].contains(found)
- NodeInstance found2 = roleHistory.findNodeForNewInstance(roleStat)
+ NodeInstance found2 = roleHistory.findRecentNodeForNewInstance(roleStat)
log.info("found: $found2")
assert [age2Active0, age3Active0].contains(found2)
assert found != found2;
@@ -100,7 +98,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
@Test
public void testFind3NodeR0ReturnsNull() throws Throwable {
assert 2== findNodes(2).size()
- NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+ NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
assert found == null;
}
@@ -124,7 +122,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
assert age2Active0.getActiveRoleInstances(0) != 0
age3Active0.get(0).onStartCompleted()
assert age3Active0.getActiveRoleInstances(0) != 0
- NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+ NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
log.info(found ?.toFullString())
assert found == null
}
@@ -148,7 +146,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
assert age2Active0.exceedsFailureThreshold(roleStat)
// get the role & expect age3 to be picked up, even though it is older
- NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+ NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
assert age3Active0.is(found)
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
index 1c99c04..7b389cd 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
@@ -240,6 +240,7 @@ class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest {
def yarnRequest = req1.buildContainerRequest(resource, workerRole, 0)
assert (yarnRequest.nodeLabelExpression == null)
assert (!yarnRequest.relaxLocality)
+ // escalation
def yarnRequest2 = req1.escalate()
assert (yarnRequest2.nodeLabelExpression == WORKERS_LABEL)
assert (yarnRequest2.relaxLocality)
@@ -258,7 +259,6 @@ class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest {
resource.virtualCores = 1
resource.memory = 48;
- def label = null
// initial request
def yarnRequest = req1.buildContainerRequest(resource, role0Status, 0)
assert yarnRequest.nodes != null
@@ -269,6 +269,34 @@ class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest {
assert yarnRequest2.relaxLocality
}
+ @Test(expected = IllegalArgumentException)
+ public void testAARequestNoNodes() throws Throwable {
+ tracker.newAARequest(role0Status.key, [])
+ }
+
+ @Test
+ public void testAARequest() throws Throwable {
+ def role0 = role0Status.key
+ OutstandingRequest request = tracker.newAARequest(role0, [host1])
+ assert host1.hostname == request.hostname
+ assert !request.located
+ }
+
+ @Test
+ public void testAARequestPair() throws Throwable {
+ def role0 = role0Status.key
+ OutstandingRequest request = tracker.newAARequest(role0, [host1, host2])
+ assert host1.hostname == request.hostname
+ assert !request.located
+ def yarnRequest = request.buildContainerRequest(
+ role0Status.copyResourceRequirements(new MockResource(0, 0)),
+ role0Status,
+ 0)
+ assert !yarnRequest.relaxLocality
+ assert !request.mayEscalate()
+
+ assert yarnRequest.nodes.size() == 2
+ }
/**
* Create a new request (always against host1)
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
index 14ac32a..2f160cb 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
@@ -87,7 +87,7 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
@Test
public void testRequestedNodeOffList() throws Throwable {
- NodeInstance ni = roleHistory.findNodeForNewInstance(roleStatus)
+ NodeInstance ni = roleHistory.findRecentNodeForNewInstance(roleStatus)
assert age3Active0 == ni
assertListEquals([age2Active0], roleHistory.cloneRecentNodeList(0))
roleHistory.requestInstanceOnNode(ni,
@@ -106,7 +106,7 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
recordAsFailed(age2Active0, 0, 4)
assert age2Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
// expect to get a null node back
- NodeInstance ni = roleHistory.findNodeForNewInstance(roleStatus)
+ NodeInstance ni = roleHistory.findRecentNodeForNewInstance(roleStatus)
assert !ni
// which is translated to a no-location request
@@ -123,7 +123,7 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
assert !age3Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
assert !roleHistory.cloneRecentNodeList(0).empty
// looking for a node should now find one
- ni = roleHistory.findNodeForNewInstance(roleStatus)
+ ni = roleHistory.findRecentNodeForNewInstance(roleStatus)
assert ni == age3Active0
req = roleHistory.requestInstanceOnNode(ni, roleStatus, resource).issuedRequest
assert 1 == req.nodes.size()
@@ -153,13 +153,13 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
assert recentRole0.indexOf(age3Active0) < recentRole0.indexOf(age2Active0)
// the non-strict role has no suitable nodes
- assert null == roleHistory.findNodeForNewInstance(role0Status)
+ assert null == roleHistory.findRecentNodeForNewInstance(role0Status)
- def ni = roleHistory.findNodeForNewInstance(targetRole)
+ def ni = roleHistory.findRecentNodeForNewInstance(targetRole)
assert ni
- def ni2 = roleHistory.findNodeForNewInstance(targetRole)
+ def ni2 = roleHistory.findRecentNodeForNewInstance(targetRole)
assert ni2
assert ni != ni2
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index 14e556a..e1660ee 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -42,8 +42,10 @@ import org.apache.slider.server.appmaster.state.ContainerAssignment
import org.apache.slider.server.appmaster.state.ContainerOutcome
import org.apache.slider.server.appmaster.state.NodeEntry
import org.apache.slider.server.appmaster.state.NodeInstance
+import org.apache.slider.server.appmaster.state.ProviderAppState
import org.apache.slider.server.appmaster.state.RoleInstance
import org.apache.slider.server.appmaster.state.RoleStatus
+import org.apache.slider.server.appmaster.state.StateAccessForProviders
import org.apache.slider.test.SliderTestBase
import org.junit.Before
@@ -59,6 +61,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
protected Path historyPath;
protected MockApplicationId applicationId;
protected MockApplicationAttemptId applicationAttemptId;
+ protected StateAccessForProviders stateAccess
/**
* Override point: called in setup() to create the YARN engine; can
@@ -97,6 +100,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
fs.delete(historyPath, true)
appState = new MockAppState()
appState.buildInstance(buildBindingInfo())
+ stateAccess = new ProviderAppState(testName, appState)
}
/**
[05/12] incubator-slider git commit: Merge branch 'develop' into
feature/SLIDER-82-pass-3.1
Posted by st...@apache.org.
Merge branch 'develop' into feature/SLIDER-82-pass-3.1
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/856ab847
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/856ab847
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/856ab847
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 856ab847ae1978c3c2a1ac600ed43c5465809e59
Parents: 2606192 c4e7329
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:58:03 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:58:03 2015 +0000
----------------------------------------------------------------------
pom.xml | 7 +++++++
slider-core/pom.xml | 1 +
.../main/java/org/apache/slider/client/SliderClient.java | 10 +++++++++-
3 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/856ab847/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
----------------------------------------------------------------------
[11/12] incubator-slider git commit: Merge branch 'develop' into
feature/SLIDER-82-pass-3.1
Posted by st...@apache.org.
Merge branch 'develop' into feature/SLIDER-82-pass-3.1
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/6b13042c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/6b13042c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/6b13042c
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 6b13042c66c0f8d0914ee18f9abddd170f6ef512
Parents: 4754088 b201de8
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 14:02:33 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 14:02:33 2015 +0000
----------------------------------------------------------------------
pom.xml | 8 ++++++--
.../slider/server/appmaster/SliderAppMaster.java | 2 +-
.../server/appmaster/monkey/ChaosKillContainer.java | 16 +++++++---------
3 files changed, 14 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6b13042c/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
[10/12] incubator-slider git commit: SLIDER-981 Build and test slider
against Hadoop 2.7.2 RC0 -allow for staging URL to be defined on CLI
Posted by st...@apache.org.
SLIDER-981 Build and test slider against Hadoop 2.7.2 RC0 -allow for staging URL to be defined on CLI
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/b201de8a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/b201de8a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/b201de8a
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: b201de8ade387dbd5550b35060ca001d2af4362b
Parents: ca6f9ce
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 13:59:36 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 14:01:41 2015 +0000
----------------------------------------------------------------------
pom.xml | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/b201de8a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 866c448..289993c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -234,17 +234,21 @@
<github.site.plugin.version>0.8</github.site.plugin.version>
<maven-site-plugin.skipDeploy>true</maven-site-plugin.skipDeploy>
<github.downloads.plugin.version>0.6</github.downloads.plugin.version>
+
+ <!-- override point for ASF staging/snapshot repos -->
+ <asf.staging>https://repository.apache.org/content/groups/staging/</asf.staging>
+ <asf.snapshots>https://repository.apache.org/content/repositories/snapshots/</asf.snapshots>
</properties>
<repositories>
<repository>
<id>ASF Staging</id>
- <url>https://repository.apache.org/content/groups/staging/</url>
+ <url>${asf.staging}</url>
</repository>
<repository>
<id>ASF Snapshots</id>
- <url>https://repository.apache.org/content/repositories/snapshots/</url>
+ <url>${asf.snapshots}</url>
<snapshots>
<enabled>true</enabled>
</snapshots>
[08/12] incubator-slider git commit: SLIDER-967 First AA placement
tests all working
Posted by st...@apache.org.
SLIDER-967 First AA placement tests all working
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/47540882
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/47540882
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/47540882
Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 47540882e15af473281eee477ea3253d5bef5e58
Parents: a7ba72e
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 13:43:21 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 13:43:21 2015 +0000
----------------------------------------------------------------------
.../slider/server/appmaster/state/AppState.java | 22 ++++++++++----------
.../server/appmaster/state/RoleHistory.java | 2 --
.../appstate/TestMockAppStateAAPlacement.groovy | 11 ++++++----
3 files changed, 18 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/47540882/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index c960510..0c66e25 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -2187,17 +2187,6 @@ public class AppState {
// add all requests to the operations list
operations.addAll(allocation.operations);
- // now for AA requests, add some more
- if (role.isAntiAffinePlacement()) {
- role.completeOutstandingAARequest();
- if (role.getPendingAntiAffineRequests() > 0) {
- // still an outstanding AA request: need to issue a new one.
- log.info("Asking for next container for AA role {}", roleName);
- role.decPendingAntiAffineRequests();
- addContainerRequest(operations, createContainerRequest(role));
- }
- }
-
//look for condition where we get more back than we asked
if (allocated > desired) {
log.info("Discarding surplus {} container {} on {}", roleName, cid,
@@ -2228,6 +2217,17 @@ public class AppState {
if (request != null) {
operations.add(request);
}
+ // now for AA requests, add some more
+ if (role.isAntiAffinePlacement()) {
+ role.completeOutstandingAARequest();
+ if (role.getPendingAntiAffineRequests() > 0) {
+ // still an outstanding AA request: need to issue a new one.
+ log.info("Asking for next container for AA role {}", roleName);
+ role.decPendingAntiAffineRequests();
+ addContainerRequest(operations, createContainerRequest(role));
+ }
+ }
+
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/47540882/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 8a840fc..d7e6050 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -749,8 +749,6 @@ public class RoleHistory {
sortRecentNodeList(role);
}
}
- // TODO: AA placement: now request a new node
-
return outcome;
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/47540882/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index 64c0362..c98f3bf 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -30,7 +30,7 @@ import org.apache.slider.server.appmaster.model.mock.MockRoles
import org.apache.slider.server.appmaster.operations.AbstractRMOperation
import org.apache.slider.server.appmaster.state.AppStateBindingInfo
import org.apache.slider.server.appmaster.state.ContainerAssignment
-import org.apache.slider.server.appmaster.state.NodeInstance
+import org.apache.slider.server.appmaster.state.NodeMap
import org.apache.slider.server.appmaster.state.RoleInstance
import org.apache.slider.server.appmaster.state.RoleStatus
import org.junit.Test
@@ -85,8 +85,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
@Test
public void testAllocateAANoLabel() throws Throwable {
- def nodemap = appState.roleHistory.cloneNodemap()
- assert nodemap.size() > 0
+ assert cloneNodemap().size() > 0
// want multiple instances, so there will be iterations
@@ -107,7 +106,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
appState.onContainersAllocated([allocated], assignments, operations)
def host = allocated.nodeId.host
- def hostInstance = nodemap.get(host)
+ def hostInstance = cloneNodemap().get(host)
assert hostInstance.get(aaRole.key).starting == 1
assert !hostInstance.canHost(aaRole.key, "")
assert !hostInstance.canHost(aaRole.key, null)
@@ -146,6 +145,10 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
assertAllContainersAA();
}
+ protected NodeMap cloneNodemap() {
+ appState.roleHistory.cloneNodemap()
+ }
+
@Test
public void testAllocateFlexUp() throws Throwable {
// want multiple instances, so there will be iterations