You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2015/11/12 19:15:16 UTC

[01/12] incubator-slider git commit: SLIDER-966 fix regressions shown up by tests, mostly in test setup, but one test, TestMockContainerResourceAllocations, showed that resource normalization stamped on vcore requirements

Repository: incubator-slider
Updated Branches:
  refs/heads/feature/SLIDER-82-pass-3.1 e0fb52916 -> 5a61b4cd8


SLIDER-966 fix regressions shown up by tests, mostly in test setup, but one test, TestMockContainerResourceAllocations, showed that resource normalization stamped on vcore requirements


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/89fd701b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/89fd701b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/89fd701b

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 89fd701bf1ab50b759babb01b075307ed6080c0d
Parents: e0fb529
Author: Steve Loughran <st...@apache.org>
Authored: Mon Nov 9 13:58:03 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Mon Nov 9 13:58:03 2015 +0000

----------------------------------------------------------------------
 .../state/AbstractClusterServices.java          | 24 +++++++++++-
 .../slider/server/appmaster/state/AppState.java | 15 +++++++-
 .../slider/client/TestClientBadArgs.groovy      | 39 ++++++++++++--------
 .../appmaster/model/mock/MockAppState.groovy    |  5 ++-
 4 files changed, 63 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
index eba8c38..54f384b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AbstractClusterServices.java
@@ -18,6 +18,7 @@
 
 package org.apache.slider.server.appmaster.state;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
 
@@ -25,6 +26,10 @@ import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
  * Cluster services offered by the YARN infrastructure.
  */
 public abstract class AbstractClusterServices {
+
+  private final DefaultResourceCalculator
+      defaultResourceCalculator = new DefaultResourceCalculator();
+
   /**
    * Create a resource for requests
    * @return a resource which can be built up.
@@ -33,7 +38,24 @@ public abstract class AbstractClusterServices {
 
   public abstract Resource newResource(int memory, int cores);
 
+  /**
+   * Normalise memory, CPU and other resources according to the YARN AM-supplied
+   * values and the resource calculator in use (currently hard-coded to the
+   * {@link DefaultResourceCalculator}.
+   * Those resources which aren't normalized (currently: CPU) are left
+   * as is.
+   * @param resource resource requirements of a role
+   * @param minR minimum values of this queue
+   * @param maxR max values of this queue
+   * @return a normalized value.
+   */
   public Resource normalize(Resource resource, Resource minR, Resource maxR) {
-    return new DefaultResourceCalculator().normalize(resource, minR, maxR);
+    Preconditions.checkArgument(resource != null, "null resource");
+    Preconditions.checkArgument(minR != null, "null minR");
+    Preconditions.checkArgument(maxR != null, "null maxR");
+
+    Resource normalize = defaultResourceCalculator.normalize(resource, minR,
+        maxR, minR);
+    return newResource(normalize.getMemory(), resource.getVirtualCores());
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 21f59a1..f74fe98 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -21,6 +21,7 @@ package org.apache.slider.server.appmaster.state;
 import com.codahale.metrics.Metric;
 import com.codahale.metrics.MetricRegistry;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
 import org.apache.hadoop.yarn.client.api.AMRMClient;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.util.resource.Resources;
 import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.ClusterDescriptionKeys;
 import org.apache.slider.api.ClusterDescriptionOperations;
@@ -322,6 +324,8 @@ public class AppState {
    */
   public AppState(AbstractClusterServices recordFactory,
       MetricsAndMonitoring metricsAndMonitoring) {
+    Preconditions.checkArgument(recordFactory != null, "null recordFactory");
+    Preconditions.checkArgument(metricsAndMonitoring != null, "null metricsAndMonitoring");
     this.recordFactory = recordFactory;
     this.metricsAndMonitoring = metricsAndMonitoring;
 
@@ -1310,7 +1314,16 @@ public class AppState {
                                      DEF_YARN_MEMORY,
                                      containerMaxMemory);
     capability.setMemory(ram);
-    return recordFactory.normalize(capability,minResource, maxResource);
+    log.debug("Component {} has RAM={}, vCores ={}", name, ram, cores);
+    Resource normalized = recordFactory.normalize(capability, minResource,
+        maxResource);
+    if (!Resources.equals(normalized, capability)) {
+      // resource requirements normalized to something other than asked for.
+      // LOG @ WARN so users can see why this is happening.
+      log.warn("Resource requirements of {} normalized" +
+              " from {} to {}", name, capability, normalized);
+    }
+    return normalized;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy b/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
index 12736e3..1b074dc 100644
--- a/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/client/TestClientBadArgs.groovy
@@ -21,6 +21,7 @@ package org.apache.slider.client
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.slider.common.params.ActionRegistryArgs
 import org.apache.slider.common.params.Arguments
 import org.apache.slider.common.params.SliderActions
@@ -44,7 +45,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testNoAction() throws Throwable {
     launchExpectingException(SliderClient,
-                             new Configuration(),
+                             createTestConfig(),
                              "Usage: slider COMMAND",
                              [])
 
@@ -53,7 +54,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testUnknownAction() throws Throwable {
     launchExpectingException(SliderClient,
-                             new Configuration(),
+                             createTestConfig(),
                              "not-a-known-action",
                              ["not-a-known-action"])
   }
@@ -61,7 +62,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testActionWithoutOptions() throws Throwable {
     launchExpectingException(SliderClient,
-                             new Configuration(),
+                             createTestConfig(),
                              "Usage: slider build <application>",
                              [SliderActions.ACTION_BUILD])
   }
@@ -69,7 +70,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testActionWithoutEnoughArgs() throws Throwable {
     launchExpectingException(SliderClient,
-                             new Configuration(),
+                             createTestConfig(),
                              ErrorStrings.ERROR_NOT_ENOUGH_ARGUMENTS,
                              [SliderActions.ACTION_THAW])
   }
@@ -77,7 +78,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testActionWithTooManyArgs() throws Throwable {
     launchExpectingException(SliderClient,
-                             new Configuration(),
+                             createTestConfig(),
                              ErrorStrings.ERROR_TOO_MANY_ARGUMENTS,
                              [SliderActions.ACTION_HELP,
                              "hello, world"])
@@ -86,7 +87,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testBadImageArg() throws Throwable {
     launchExpectingException(SliderClient,
-                             new Configuration(),
+                             createTestConfig(),
                              "Unknown option: --image",
                             [SliderActions.ACTION_HELP,
                              Arguments.ARG_IMAGE])
@@ -95,7 +96,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testRegistryUsage() throws Throwable {
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "org.apache.slider.core.exceptions.UsageException: Argument --name missing",
         [SliderActions.ACTION_REGISTRY])
     assert exception instanceof UsageException
@@ -105,7 +106,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testRegistryExportBadUsage1() throws Throwable {
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "Expected a value after parameter --getexp",
         [SliderActions.ACTION_REGISTRY,
             Arguments.ARG_NAME,
@@ -118,7 +119,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testRegistryExportBadUsage2() throws Throwable {
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "Expected a value after parameter --getexp",
         [SliderActions.ACTION_REGISTRY,
             Arguments.ARG_NAME,
@@ -132,7 +133,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testRegistryExportBadUsage3() throws Throwable {
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "Usage: registry",
         [SliderActions.ACTION_REGISTRY,
             Arguments.ARG_NAME,
@@ -147,7 +148,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   @Test
   public void testUpgradeUsage() throws Throwable {
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "org.apache.slider.core.exceptions.BadCommandArgumentsException: Not enough arguments for action: upgrade Expected minimum 1 but got 0",
         [SliderActions.ACTION_UPGRADE])
     assert exception instanceof BadCommandArgumentsException
@@ -158,7 +159,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   public void testUpgradeWithTemplateOptionOnly() throws Throwable {
     String appName = "test_hbase"
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "BadCommandArgumentsException: Option --resources must be specified with option --template",
         [SliderActions.ACTION_UPGRADE,
             appName,
@@ -169,11 +170,17 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
     log.info(exception.toString())
   }
 
+  public Configuration createTestConfig() {
+    def configuration = new Configuration()
+    configuration.set(YarnConfiguration.RM_ADDRESS,  "127.0.0.1:8032")
+    return configuration
+  }
+
   @Test
   public void testUpgradeWithResourcesOptionOnly() throws Throwable {
     String appName = "test_hbase"
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "BadCommandArgumentsException: Option --template must be specified with option --resources",
         [SliderActions.ACTION_UPGRADE,
             appName,
@@ -188,7 +195,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   public void testUpgradeWithTemplateResourcesAndContainersOption() throws Throwable {
     String appName = "test_hbase"
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "BadCommandArgumentsException: Option --containers cannot be "
         + "specified with --template or --resources",
         [SliderActions.ACTION_UPGRADE,
@@ -208,7 +215,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   public void testUpgradeWithTemplateResourcesAndComponentsOption() throws Throwable {
     String appName = "test_hbase"
     def exception = launchExpectingException(SliderClient,
-        new Configuration(),
+        createTestConfig(),
         "BadCommandArgumentsException: Option --components cannot be "
         + "specified with --template or --resources",
         [SliderActions.ACTION_UPGRADE,
@@ -228,7 +235,7 @@ class TestClientBadArgs extends ServiceLauncherBaseTest {
   public void testCreateAppWithAddonPkgBadArg1() throws Throwable {
     //add on package without specifying add on package name
       def exception = launchExpectingException(SliderClient,
-          new Configuration(),
+          createTestConfig(),
           "Expected 2 values after --addon",
           [SliderActions.ACTION_CREATE,
               "cl1",

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/89fd701b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
index 5565e6b..c041ce5 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockAppState.groovy
@@ -29,8 +29,10 @@ import org.apache.slider.server.appmaster.state.AppStateBindingInfo
 class MockAppState extends AppState {
   public static final int RM_MAX_RAM = 4096
   public static final int RM_MAX_CORES = 64
+
   public MockAppState(AbstractClusterServices recordFactory) {
     super(recordFactory, new MetricsAndMonitoring());
+    setContainerLimits(1, RM_MAX_RAM, 1, RM_MAX_CORES)
   }
 
   long time = 0;
@@ -39,8 +41,7 @@ class MockAppState extends AppState {
    * Instance with a mock record factory
    */
   public MockAppState() {
-    super(new MockClusterServices(), new MetricsAndMonitoring());
-    setContainerLimits(1, RM_MAX_RAM, 1, RM_MAX_CORES)
+    this(new MockClusterServices());
   }
 
   MockAppState(AppStateBindingInfo bindingInfo) {


[02/12] incubator-slider git commit: SLIDER-967 Use nodemap to build up location restrictions on AA placement

Posted by st...@apache.org.
SLIDER-967 Use nodemap to build up location restrictions on AA placement


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/2606192a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/2606192a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/2606192a

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 2606192ab0685fc267fcd5f3670e13b93a7a83b6
Parents: 89fd701
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:54:15 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:54:15 2015 +0000

----------------------------------------------------------------------
 .../slider/server/appmaster/state/AppState.java | 15 ++-
 .../server/appmaster/state/NodeInstance.java    | 74 +++++++++++++--
 .../slider/server/appmaster/state/NodeMap.java  | 44 ++++++++-
 .../appmaster/state/OutstandingRequest.java     | 67 +++----------
 .../state/OutstandingRequestTracker.java        | 23 ++---
 .../server/appmaster/state/RoleHistory.java     | 70 +++++++-------
 .../appmaster/state/RoleHostnamePair.java       | 75 +++++++++++++++
 .../server/appmaster/web/SliderAMWebApp.java    |  2 +-
 .../model/history/TestRoleHistoryAA.groovy      | 98 +++++++++++++++++++-
 .../TestRoleHistoryContainerEvents.groovy       |  5 +-
 .../appmaster/model/mock/MockNodeReport.groovy  | 34 +++++++
 11 files changed, 376 insertions(+), 131 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index f74fe98..063a7fc 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -1427,7 +1427,11 @@ public class AppState {
    * @param updatedNodes updated nodes
    */
   public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
-    roleHistory.onNodesUpdated(updatedNodes);
+    boolean changed = roleHistory.onNodesUpdated(updatedNodes);
+    if (changed) {
+      //TODO
+      log.error("TODO: cancel AA requests and re-review");
+    }
   }
 
   /**
@@ -1923,13 +1927,18 @@ public class AppState {
 
       if (isAA) {
         // build one only if there is none outstanding
-        if (role.getPendingAntiAffineRequests() == 0) {
+        if (role.getPendingAntiAffineRequests() == 0
+            && roleHistory.canPlaceAANodes()) {
           log.info("Starting an anti-affine request sequence for {} nodes", delta);
           // log the number outstanding
           role.incPendingAntiAffineRequests(delta - 1);
           addContainerRequest(operations, createContainerRequest(role));
         } else {
-          log.info("Adding {} more anti-affine requests", delta);
+          if (roleHistory.canPlaceAANodes()) {
+            log.info("Adding {} more anti-affine requests", delta);
+          } else {
+            log.warn("Awaiting node map before generating node requests");
+          }
           role.incPendingAntiAffineRequests(delta);
         }
       } else {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index 7fc912d..b805ffb 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -20,16 +20,15 @@ package org.apache.slider.server.appmaster.state;
 
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
-import org.apache.slider.api.types.NodeEntryInformation;
 import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.common.tools.Comparators;
-import org.apache.slider.common.tools.SliderUtils;
 
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 import java.util.ListIterator;
+import java.util.Set;
 
 /**
  * A node instance -stores information about a node in the cluster.
@@ -46,6 +45,9 @@ public class NodeInstance {
    */
   private NodeState nodeState = NodeState.RUNNING;
 
+  /**
+   * Last node report. If null: none
+   */
   private NodeReport nodeReport = null;
 
   /**
@@ -53,6 +55,17 @@ public class NodeInstance {
    */
   private long nodeStateUpdateTime = 0;
 
+  /**
+   * Node labels.
+   *
+   * IMPORTANT: we assume that there is one label/node, which is the policy
+   * for Hadoop as of November 2015
+   */
+  private String nodeLabels = "";
+
+  /**
+   * The list of node entries of specific roles
+   */
   private final List<NodeEntry> nodeEntries;
 
   /**
@@ -64,18 +77,41 @@ public class NodeInstance {
     nodeEntries = new ArrayList<>(roles);
   }
 
-
   /**
-   * Update the node status
+   * Update the node status.
+   * The return code is true if the node state changed enough to
+   * trigger a re-evaluation of pending requests. That is, either a node
+   * became available when it was previously not, or the label changed
+   * on an available node.
+   *
+   * Transitions of a node from live to dead aren't treated as significant,
+   * nor label changes on a dead node.
+   *
    * @param report latest node report
-   * @return true if the node state changed
+   * @return true if the node state changed enough for a request evaluation.
    */
   public synchronized boolean updateNode(NodeReport report) {
+    nodeStateUpdateTime = report.getLastHealthReportTime();
     nodeReport = report;
     NodeState oldState = nodeState;
+    boolean oldStateUnusable = oldState.isUnusable();
     nodeState = report.getNodeState();
-    nodeStateUpdateTime = report.getLastHealthReportTime();
-    return nodeState != oldState;
+    boolean newUsable = !nodeState.isUnusable();
+    boolean nodeNowAvailable = oldStateUnusable && newUsable;
+    String labels = this.nodeLabels;
+    Set<String> newlabels = report.getNodeLabels();
+    if (newlabels != null && !newlabels.isEmpty()) {
+      nodeLabels = newlabels.iterator().next().trim();
+    } else {
+      nodeLabels = "";
+    }
+    return nodeNowAvailable
+        || newUsable && !this.nodeLabels.equals(labels);
+  }
+
+
+  public String getNodeLabels() {
+    return nodeLabels;
   }
 
   /**
@@ -130,6 +166,14 @@ public class NodeInstance {
   }
 
   /**
+   * Is the node considered online
+   * @return the node
+   */
+  public boolean isOnline() {
+    return !nodeState.isUnusable();
+  }
+
+  /**
    * Query for a node being considered unreliable
    * @param role role key
    * @param threshold threshold above which a node is considered unreliable
@@ -137,7 +181,6 @@ public class NodeInstance {
    */
   public boolean isConsideredUnreliable(int role, int threshold) {
     NodeEntry entry = get(role);
-
     return entry != null && entry.getFailedRecently() > threshold;
   }
 
@@ -258,10 +301,10 @@ public class NodeInstance {
     // null-handling state constructor
     info.state = "" + nodeState;
     info.lastUpdated = nodeStateUpdateTime;
+    info.labels = nodeLabels;
     if (nodeReport != null) {
       info.httpAddress = nodeReport.getHttpAddress();
       info.rackName = nodeReport.getRackName();
-      info.labels = SliderUtils.join(nodeReport.getNodeLabels(), ", ", false);
       info.healthReport = nodeReport.getHealthReport();
     }
     info.entries = new ArrayList<>(nodeEntries.size());
@@ -272,6 +315,19 @@ public class NodeInstance {
   }
 
   /**
+   * Is this node instance a suitable candidate for the specific role?
+   * @param role role ID
+   * @param label label which must match, or "" for no label checks
+   * @return true if the node has space for this role, is running and the labels
+   * match.
+   */
+  public boolean canHost(int role, String label) {
+    return isOnline()
+        && (label.isEmpty() || label.equals(nodeLabels))   // label match
+        && (get(role) == null || get(role).isAvailable()); // no live role
+  }
+
+  /**
    * A comparator for sorting entries where the node is preferred over another.
    *
    * The exact algorithm may change: current policy is "most recent first", so sorted

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index b631057..2887c9e 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -92,20 +92,24 @@ public class NodeMap extends HashMap<String, NodeInstance> {
     }
   }
 
-
   /**
-   * Update the node state
+   * Update the node state. Return true if the node state changed: either by
+   * being created, or by changing its internal state as defined
+   * by {@link NodeInstance#updateNode(NodeReport)}.
+   *
    * @param hostname host name
    * @param report latest node report
-   * @return the updated node.
+   * @return true if the node state changed enough for a request evaluation.
    */
   public boolean updateNode(String hostname, NodeReport report) {
-    return getOrCreate(hostname).updateNode(report);
+    boolean nodeExisted = get(hostname) != null;
+    boolean updated = getOrCreate(hostname).updateNode(report);
+    return updated || !nodeExisted;
   }
 
   /**
    * Clone point
-   * @return
+   * @return a shallow clone
    */
   @Override
   public Object clone() {
@@ -123,4 +127,34 @@ public class NodeMap extends HashMap<String, NodeInstance> {
       put(node.hostname, node);
     }
   }
+
+  /**
+   * Test helper: build or update a cluster from a list of node reports
+   * @param reports the list of reports
+   * @return true if this has been considered to have changed the cluster
+   */
+  @VisibleForTesting
+  public boolean buildOrUpdate(List<NodeReport> reports) {
+    boolean updated = false;
+    for (NodeReport report : reports) {
+      updated |= getOrCreate(report.getNodeId().getHost()).updateNode(report);
+    }
+    return updated;
+  }
+
+  /**
+   * Scan the current node map for all nodes capable of hosting an instance
+   * @param role role ID
+   * @param label label which must match, or "" for no label checks
+   * @return a list of node instances matching the criteria.
+   */
+  public List<NodeInstance> findNodesForRole(int role, String label) {
+    List<NodeInstance> nodes = new ArrayList<>(size());
+    for (NodeInstance instance : values()) {
+      if (instance.canHost(role, label)) {
+        nodes.add(instance);
+      }
+    }
+    return nodes;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
index 38bc96f..a9d4b52 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
@@ -43,24 +43,14 @@ import java.util.List;
  * instance constructed with (role, hostname) can be used to look up
  * a complete request instance in the {@link OutstandingRequestTracker} map
  */
-public final class OutstandingRequest {
+public final class OutstandingRequest extends RoleHostnamePair {
   protected static final Logger log =
     LoggerFactory.getLogger(OutstandingRequest.class);
 
   /**
-   * requested role
-   */
-  public final int roleId;
-
-  /**
    * Node the request is for -may be null
    */
   public final NodeInstance node;
-  
-  /**
-   * hostname -will be null if node==null
-   */
-  public final String hostname;
 
   /**
    * Optional label. This is cached as the request option (explicit-location + label) is forbidden,
@@ -111,9 +101,8 @@ public final class OutstandingRequest {
    */
   public OutstandingRequest(int roleId,
                             NodeInstance node) {
-    this.roleId = roleId;
+    super(roleId, node != null ? node.hostname : null);
     this.node = node;
-    this.hostname = node != null ? node.hostname : null;
   }
 
   /**
@@ -125,9 +114,8 @@ public final class OutstandingRequest {
    * @param hostname hostname
    */
   public OutstandingRequest(int roleId, String hostname) {
+    super(roleId, hostname);
     this.node = null;
-    this.roleId = roleId;
-    this.hostname = hostname;
   }
 
   /**
@@ -301,52 +289,13 @@ public final class OutstandingRequest {
     return issuedRequest != null && issuedRequest.getCapability().equals(resource);
   }
 
-  /**
-   * Equality is on hostname and role
-   * @param o other
-   * @return true on a match
-   */
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    OutstandingRequest request = (OutstandingRequest) o;
-
-    if (roleId != request.roleId) {
-      return false;
-    }
-    if (hostname != null
-        ? !hostname.equals(request.hostname)
-        : request.hostname != null) {
-      return false;
-    }
-    return true;
-  }
-
-  /**
-   * hash on hostname and role
-   * @return hash code
-   */
-  @Override
-  public int hashCode() {
-    int result = roleId;
-    result = 31 * result + (hostname != null ? hostname.hashCode() : 0);
-    return result;
-  }
-
   @Override
   public String toString() {
     int requestRoleId = ContainerPriority.extractRole(getPriority());
     boolean requestHasLocation = ContainerPriority.hasLocation(getPriority());
     final StringBuilder sb = new StringBuilder("OutstandingRequest{");
-    sb.append("roleId=").append(this.roleId);
+    sb.append(super.toString());
     sb.append(", node=").append(node);
-    sb.append(", hostname='").append(hostname).append('\'');
     sb.append(", hasLocation=").append(requestHasLocation);
     sb.append(", requestedTimeMillis=").append(requestedTimeMillis);
     sb.append(", mayEscalate=").append(mayEscalate);
@@ -367,7 +316,6 @@ public final class OutstandingRequest {
     return new CancelSingleRequest(issuedRequest);
   }
 
-
   /**
    * Valid if a node label expression specified on container request is valid or
    * not. Mimics the logic in AMRMClientImpl, so can be used for preflight checking
@@ -410,5 +358,12 @@ public final class OutstandingRequest {
     }
   }
 
+  /**
+   * Create a new role/hostname pair for indexing.
+   * @return a new index.
+   */
+  public RoleHostnamePair getIndex() {
+    return new RoleHostnamePair(roleId, hostname);
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index a791826..ecdc07a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -35,10 +35,12 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Tracks outstanding requests made with a specific placement option.
@@ -62,8 +64,7 @@ public class OutstandingRequestTracker {
    */
   private final List<AbstractRMOperation> NO_REQUESTS = new ArrayList<>(0);
  
-  private Map<OutstandingRequest, OutstandingRequest> placedRequests =
-    new HashMap<>();
+  private Map<RoleHostnamePair, OutstandingRequest> placedRequests = new HashMap<>();
 
   /**
    * List of open requests; no specific details on them.
@@ -82,10 +83,9 @@ public class OutstandingRequestTracker {
    * @return a new request
    */
   public synchronized OutstandingRequest newRequest(NodeInstance instance, int role) {
-    OutstandingRequest request =
-      new OutstandingRequest(role, instance);
+    OutstandingRequest request = new OutstandingRequest(role, instance);
     if (request.isLocated()) {
-      placedRequests.put(request, request);
+      placedRequests.put(request.getIndex(), request);
     } else {
       openRequests.add(request);
     }
@@ -101,7 +101,7 @@ public class OutstandingRequestTracker {
   @VisibleForTesting
   public synchronized OutstandingRequest lookupPlacedRequest(int role, String hostname) {
     Preconditions.checkArgument(hostname != null, "null hostname");
-    return placedRequests.get(new OutstandingRequest(role, hostname));
+    return placedRequests.get(new RoleHostnamePair(role, hostname));
   }
 
   /**
@@ -294,12 +294,12 @@ public class OutstandingRequestTracker {
    */
   public synchronized List<NodeInstance> resetOutstandingRequests(int role) {
     List<NodeInstance> hosts = new ArrayList<>();
-    Iterator<Map.Entry<OutstandingRequest,OutstandingRequest>> iterator =
+    Iterator<Map.Entry<RoleHostnamePair, OutstandingRequest>> iterator =
       placedRequests.entrySet().iterator();
     while (iterator.hasNext()) {
-      Map.Entry<OutstandingRequest, OutstandingRequest> next =
+      Map.Entry<RoleHostnamePair, OutstandingRequest> next =
         iterator.next();
-      OutstandingRequest request = next.getKey();
+      OutstandingRequest request = next.getValue();
       if (request.roleId == role) {
         iterator.remove();
         request.completed();
@@ -390,9 +390,10 @@ public class OutstandingRequestTracker {
    */
   public synchronized List<OutstandingRequest> extractPlacedRequestsForRole(int roleId, int count) {
     List<OutstandingRequest> results = new ArrayList<>();
-    Iterator<OutstandingRequest> iterator = placedRequests.keySet().iterator();
+    Iterator<Map.Entry<RoleHostnamePair, OutstandingRequest>>
+        iterator = placedRequests.entrySet().iterator();
     while (iterator.hasNext() && count > 0) {
-      OutstandingRequest request = iterator.next();
+      OutstandingRequest request = iterator.next().getValue();
       if (request.roleId == roleId) {
         results.add(request);
         count--;

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index f8271a6..df1f4e1 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -109,12 +109,6 @@ public class RoleHistory {
   private Map<Integer, LinkedList<NodeInstance>> recentNodes;
 
   /**
-   * Track the failed nodes. Currently used to make wiser decision of container
-   * ask with/without locality. Has other potential uses as well.
-   */
-  private Set<String> failedNodes = new HashSet<>();
-
-  /**
    * Instantiate
    * @param roles initial role list
    * @param recordFactory yarn record factory
@@ -137,7 +131,6 @@ public class RoleHistory {
   protected synchronized void reset() throws BadConfigException {
 
     nodemap = new NodeMap(roleSize);
-    failedNodes = new HashSet<>();
     resetAvailableNodeLists();
     outstandingRequests = new OutstandingRequestTracker();
   }
@@ -578,7 +571,8 @@ public class RoleHistory {
       NodeInstance candidate = targets.get(i);
       if (candidate.getActiveRoleInstances(roleId) == 0) {
         // no active instances: check failure statistics
-        if (strictPlacement || !candidate.exceedsFailureThreshold(role)) {
+        if (strictPlacement
+            || (candidate.isOnline() && !candidate.exceedsFailureThreshold(role))) {
           targets.remove(i);
           // exit criteria for loop is now met
           nodeInstance = candidate;
@@ -779,6 +773,16 @@ public class RoleHistory {
   }
 
   /**
+   * Does the RoleHistory have enough information about the YARN cluster
+   * to start placing AA requests? That is: has it the node map and
+   * any label information needed?
+   * @return true if the caller can start requesting AA nodes
+   */
+  public boolean canPlaceAANodes() {
+    return nodeUpdateReceived.get();
+  }
+
+  /**
    * Get the last time the nodes were updated from YARN
    * @return the update time or zero if never updated.
    */
@@ -788,33 +792,35 @@ public class RoleHistory {
 
   /**
    * Update failedNodes and nodemap based on the node state
-   * 
+   *
    * @param updatedNodes list of updated nodes
+   * @return true if a review should be triggered.
    */
-  public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
+  public synchronized boolean onNodesUpdated(List<NodeReport> updatedNodes) {
     log.debug("Updating {} nodes", updatedNodes.size());
     nodesUpdatedTime.set(now());
     nodeUpdateReceived.set(true);
+    int printed = 0;
+    boolean triggerReview = false;
     for (NodeReport updatedNode : updatedNodes) {
       String hostname = updatedNode.getNodeId() == null
-                        ? null 
-                        : updatedNode.getNodeId().getHost();
+          ? ""
+          : updatedNode.getNodeId().getHost();
       NodeState nodeState = updatedNode.getNodeState();
-      if (hostname == null || nodeState == null) {
+      if (hostname.isEmpty() || nodeState == null) {
+        log.warn("Ignoring incomplete update");
         continue;
       }
-      log.debug("host {} is in state {}", hostname, nodeState);
+      if (log.isDebugEnabled() && printed++ < 10) {
+        // log the first few, but avoid overloading the logs for a full cluster
+        // update
+        log.debug("Node \"{}\" is in state {}", hostname, nodeState);
+      }
       // update the node; this also creates an instance if needed
       boolean updated = nodemap.updateNode(hostname, updatedNode);
-      if (updated) {
-        if (nodeState.isUnusable()) {
-          log.info("Failed node {} state {}", hostname, nodeState);
-          failedNodes.add(hostname);
-        } else {
-          failedNodes.remove(hostname);
-        }
-      }
+      triggerReview |= updated;
     }
+    return triggerReview;
   }
 
   /**
@@ -852,7 +858,7 @@ public class RoleHistory {
 
   /**
    * Mark a container finished; if it was released then that is treated
-   * differently. history is touch()ed
+   * differently. history is {@code touch()}-ed
    *
    *
    * @param container completed container
@@ -917,9 +923,6 @@ public class RoleHistory {
     for (NodeInstance node : nodemap.values()) {
       log.info(node.toFullString());
     }
-
-    log.info("Failed nodes: {}",
-        SliderUtils.joinWithInnerSeparator(" ", failedNodes));
   }
 
   /**
@@ -963,17 +966,6 @@ public class RoleHistory {
   }
 
   /**
-   * Get a clone of the failedNodes
-   * 
-   * @return the list
-   */
-  public List<String> cloneFailedNodes() {
-    List<String> lst = new ArrayList<>();
-    lst.addAll(failedNodes);
-    return lst;
-  }
-
-  /**
    * Escalate operation as triggered by external timer.
    * @return a (usually empty) list of cancel/request operations.
    */
@@ -983,8 +975,8 @@ public class RoleHistory {
 
   /**
    * Build the list of requests to cancel from the outstanding list.
-   * @param role
-   * @param toCancel
+   * @param role role
+   * @param toCancel number to cancel
    * @return a list of cancellable operations.
    */
   public synchronized List<AbstractRMOperation> cancelRequestsForRole(RoleStatus role, int toCancel) {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java
new file mode 100644
index 0000000..920887a
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHostnamePair.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.state;
+
+import java.util.Objects;
+
+public class RoleHostnamePair {
+
+  /**
+   * requested role
+   */
+  public final int roleId;
+
+  /**
+   * hostname -will be null if node==null
+   */
+  public final String hostname;
+
+  public RoleHostnamePair(int roleId, String hostname) {
+    this.roleId = roleId;
+    this.hostname = hostname;
+  }
+
+  public int getRoleId() {
+    return roleId;
+  }
+
+  public String getHostname() {
+    return hostname;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof RoleHostnamePair)) {
+      return false;
+    }
+    RoleHostnamePair that = (RoleHostnamePair) o;
+    return Objects.equals(roleId, that.roleId) &&
+        Objects.equals(hostname, that.hostname);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(roleId, hostname);
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder(
+        "RoleHostnamePair{");
+    sb.append("roleId=").append(roleId);
+    sb.append(", hostname='").append(hostname).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
index 84f0eba..7ecc00c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/SliderAMWebApp.java
@@ -92,7 +92,7 @@ public class SliderAMWebApp extends WebApp {
     String regex = "(?!/ws)";
     serveRegex(regex).with(SliderDefaultWrapperServlet.class); 
 
-    Map<String, String> params = new HashMap<String, String>();
+    Map<String, String> params = new HashMap<>();
     params.put(ResourceConfig.FEATURE_IMPLICIT_VIEWABLES, "true");
     params.put(ServletContainer.FEATURE_FILTER_FORWARD_ON_404, "true");
     params.put(ResourceConfig.FEATURE_XMLROOTELEMENT_PROCESSING, "true");

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index 36b9d66..f99326f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -18,16 +18,106 @@
 
 package org.apache.slider.server.appmaster.model.history
 
-import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
+import groovy.util.logging.Slf4j
+import org.apache.hadoop.yarn.api.records.NodeReport
+import org.apache.hadoop.yarn.api.records.NodeState
+import org.apache.slider.server.appmaster.model.mock.MockNodeReport
+import org.apache.slider.server.appmaster.state.NodeEntry
+import org.apache.slider.server.appmaster.state.NodeInstance
+import org.apache.slider.server.appmaster.state.NodeMap
+import org.apache.slider.test.SliderTestBase
 import org.junit.Test
 
 /**
  * Test anti-affine
  */
-class TestRoleHistoryAA extends BaseMockAppStateTest {
+//@CompileStatic
+@Slf4j
+class TestRoleHistoryAA extends SliderTestBase {
+
+  List<String> hostnames = ["one", "two", "three"]
+  NodeMap nodeMap, gpuNodeMap
+
+  @Override
+  void setup() {
+    super.setup()
+    nodeMap = createNodeMap(hostnames, NodeState.RUNNING)
+    gpuNodeMap = createNodeMap(hostnames, NodeState.RUNNING, "GPU")
+
+  }
+
+  @Test
+  public void testFindNodesInFullCluster() throws Throwable {
+    // all three will surface at first
+    assertResultSize(3, nodeMap.findNodesForRole(1, ""))
+  }
+
+  @Test
+  public void testFindNodesInUnhealthyCluster() throws Throwable {
+    // all three will surface at first
+    nodeMap.get("one").updateNode(new MockNodeReport("one",NodeState.UNHEALTHY))
+    assertResultSize(2, nodeMap.findNodesForRole(1, ""))
+  }
+
+  @Test
+  public void testFindNoNodesWrongLabel() throws Throwable {
+    // all three will surface at first
+    assertResultSize(0, nodeMap.findNodesForRole(1, "GPU"))
+  }
+
+  @Test
+  public void testFindNoNodesRightLabel() throws Throwable {
+    // all three will surface at first
+    assertResultSize(3, gpuNodeMap.findNodesForRole(1, "GPU"))
+  }
 
   @Test
-  public void test() throws Throwable {
-    
+  public void testFindNoNodesNoLabel() throws Throwable {
+    // all three will surface at first
+    assertResultSize(3, gpuNodeMap.findNodesForRole(1, ""))
+  }
+
+  @Test
+  public void testFindNoNodesClusterRequested() throws Throwable {
+    // all three will surface at first
+    applyToNodeEntries(nodeMap) {
+      NodeEntry it -> it.request()
+    }
+    assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+  }
+
+  @Test
+  public void testFindNoNodesClusterBusy() throws Throwable {
+    // all three will surface at first
+    applyToNodeEntries(nodeMap) {
+      NodeEntry it -> it.request()
+    }
+    assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+  }
+
+  def assertResultSize(int size, List<NodeInstance> list) {
+    if (list.size() != size) {
+      list.each { log.error(it.toFullString())}
+    }
+    assert size == list.size()
+  }
+
+  def applyToNodeEntries(Collection<NodeInstance> list, Closure cl) {
+    list.each { it -> cl(it.getOrCreate(1)) }
+  }
+
+  def applyToNodeEntries(NodeMap nodeMap, Closure cl) {
+    applyToNodeEntries(nodeMap.values(), cl)
+  }
+
+  def NodeMap createNodeMap(List<NodeReport> nodeReports) {
+    NodeMap nodeMap = new NodeMap(1)
+    nodeMap.buildOrUpdate(nodeReports)
+    nodeMap
+  }
+
+  def NodeMap createNodeMap(List<String> hosts, NodeState state,
+      String label = "") {
+    createNodeMap(MockNodeReport.createInstances(hosts, state, label))
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
index d9cfddb..c8a82bd 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryContainerEvents.groovy
@@ -415,7 +415,7 @@ class TestRoleHistoryContainerEvents extends BaseMockAppStateTest {
     // as even unused nodes are added to the list, we expect the map size to be >1
     assert startSize <= endSize
     assert nodemap[hostname] != null
-    assert roleHistory.cloneFailedNodes().contains(hostname)
+    assert !nodemap[hostname].online
 
     // add a failure of a node we've never head of
     def newhost = "newhost"
@@ -428,9 +428,8 @@ class TestRoleHistoryContainerEvents extends BaseMockAppStateTest {
     roleHistory.onNodesUpdated(nodesUpdated)
 
     def nodemap2 = roleHistory.cloneNodemap()
-    assert nodemap2.size() > endSize
-    assert roleHistory.cloneFailedNodes().contains(newhost)
     assert nodemap2[newhost]
+    assert !nodemap2[newhost].online
 
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/2606192a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
index 1c7a816..43eef3e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockNodeReport.groovy
@@ -18,6 +18,7 @@
 
 package org.apache.slider.server.appmaster.model.mock
 
+import groovy.transform.CompileStatic
 import org.apache.hadoop.yarn.api.records.NodeId
 import org.apache.hadoop.yarn.api.records.NodeReport
 import org.apache.hadoop.yarn.api.records.NodeState
@@ -26,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.Resource
 /**
  * Node report for testing
  */
+@CompileStatic
 class MockNodeReport extends NodeReport {
   NodeId nodeId;
   NodeState nodeState;
@@ -37,4 +39,36 @@ class MockNodeReport extends NodeReport {
   String healthReport;
   long lastHealthReportTime;
   Set<String> nodeLabels;
+
+  MockNodeReport() {
+  }
+
+  /**
+   * Create a single instance
+   * @param hostname
+   * @param nodeState
+   * @param label
+   */
+  MockNodeReport(String hostname, NodeState nodeState, String label ="") {
+    nodeId = NodeId.newInstance(hostname, 80)
+    this.nodeState = nodeState
+    this.httpAddress = "http$hostname:80"
+    this.nodeLabels = new HashSet<>()
+    nodeLabels.add(label)
+  }
+
+  /**
+   * Create a list of instances -one for each hostname
+   * @param hostnames hosts
+   * @param nodeState state of all of them
+   * @param label label for all of them
+   * @return
+   */
+  static List<MockNodeReport> createInstances(
+      List<String> hostnames,
+      NodeState nodeState = NodeState.RUNNING,
+      String label = "") {
+    hostnames.collect { String  name ->
+      new MockNodeReport(name, nodeState, label)}
+  }
 }


[09/12] incubator-slider git commit: SLIDER-982 chaos monkey to not log @ info if its not actually active

Posted by st...@apache.org.
SLIDER-982 chaos monkey to not log @ info if its not actually active


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/ca6f9cee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/ca6f9cee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/ca6f9cee

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: ca6f9cee09b4b6c7b6dee83bdd06268e8ddd4d50
Parents: c4e7329
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 13:58:50 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 14:01:31 2015 +0000

----------------------------------------------------------------------
 .../slider/server/appmaster/SliderAppMaster.java    |  2 +-
 .../server/appmaster/monkey/ChaosKillContainer.java | 16 +++++++---------
 2 files changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ca6f9cee/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index df91d7f..3d062b5 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -2206,7 +2206,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
         internals.getOptionBool(InternalKeys.CHAOS_MONKEY_ENABLED,
             InternalKeys.DEFAULT_CHAOS_MONKEY_ENABLED);
     if (!enabled) {
-      log.info("Chaos monkey disabled");
+      log.debug("Chaos monkey disabled");
       return false;
     }
     

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ca6f9cee/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
index a5cdfc6..ae38e4c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/monkey/ChaosKillContainer.java
@@ -72,15 +72,13 @@ public class ChaosKillContainer implements ChaosTarget {
       }
     }
     int size = liveContainers.size();
-    if (size <= 0) {
-      log.info("No containers to kill");
-      return;
-    }
-    int target = random.nextInt(size);
-    RoleInstance roleInstance = liveContainers.get(target);
-    log.info("Killing {}", roleInstance);
+    if (size > 0) {
+      int target = random.nextInt(size);
+      RoleInstance roleInstance = liveContainers.get(target);
+      log.info("Killing {}", roleInstance);
 
-    queues.schedule(new ActionKillContainer(roleInstance.getId(),
-        DELAY, TimeUnit.MILLISECONDS, operationHandler));
+      queues.schedule(new ActionKillContainer(roleInstance.getId(),
+          DELAY, TimeUnit.MILLISECONDS, operationHandler));
+    }
   }
 }


[03/12] incubator-slider git commit: SLIDER-960 switch to groovy 2.4.4 (the ASF edition), with the faster java7 build

Posted by st...@apache.org.
SLIDER-960 switch to groovy 2.4.4 (the ASF edition), with the faster java7 build


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/618782a9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/618782a9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/618782a9

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 618782a9b66e0ae35e79d07888fec45d19ccb421
Parents: fed5d03
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:57:14 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:57:14 2015 +0000

----------------------------------------------------------------------
 pom.xml             | 7 +++++++
 slider-core/pom.xml | 1 +
 2 files changed, 8 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/618782a9/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3943ea9..866c448 100644
--- a/pom.xml
+++ b/pom.xml
@@ -511,6 +511,13 @@
       </dependency>
 
       <dependency>
+        <groupId>org.codehaus.groovy</groupId>
+        <artifactId>groovy-all</artifactId>
+        <classifier>indy</classifier>
+        <version>${groovy.version}</version>
+      </dependency>
+
+      <dependency>
         <groupId>com.beust</groupId>
         <artifactId>jcommander</artifactId>
         <version>${jcommander.version}</version>

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/618782a9/slider-core/pom.xml
----------------------------------------------------------------------
diff --git a/slider-core/pom.xml b/slider-core/pom.xml
index 704fa06..c7e8933 100644
--- a/slider-core/pom.xml
+++ b/slider-core/pom.xml
@@ -225,6 +225,7 @@
     <dependency>
       <groupId>org.codehaus.groovy</groupId>
       <artifactId>groovy-all</artifactId>
+      <classifier>indy</classifier>
       <scope>test</scope>
     </dependency>
 


[12/12] incubator-slider git commit: SLIDER-967 AA placement with nodemap updates working

Posted by st...@apache.org.
SLIDER-967 AA placement with nodemap updates working


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/5a61b4cd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/5a61b4cd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/5a61b4cd

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 5a61b4cd8189ae02eb9eaeb8ffdb25604dcc4376
Parents: 6b13042
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 18:15:07 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 18:15:07 2015 +0000

----------------------------------------------------------------------
 .../server/appmaster/SliderAppMaster.java       | 13 ++-
 .../slider/server/appmaster/state/AppState.java | 51 ++++++++++--
 .../state/OutstandingRequestTracker.java        |  8 +-
 .../server/appmaster/state/RoleHistory.java     |  5 --
 .../server/appmaster/state/RoleStatus.java      | 55 ++++++++-----
 .../appstate/TestMockAppStateAAPlacement.groovy | 85 +++++++++++++++++---
 .../model/history/TestRoleHistoryAA.groovy      |  4 -
 .../model/mock/BaseMockAppStateTest.groovy      | 21 ++++-
 8 files changed, 183 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index b54ea6c..eb7b26a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -1851,10 +1851,15 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
     LOG_YARN.info("onNodesUpdated({})", updatedNodes.size());
     log.info("Updated nodes {}", updatedNodes);
     // Check if any nodes are lost or revived and update state accordingly
-    List<AbstractRMOperation> operations = appState.onNodesUpdated(updatedNodes);
-    execute(operations);
-    // if there were any operations, trigger a review
-    reviewRequestAndReleaseNodes("nodes updated");
+
+    AppState.NodeUpdatedOutcome outcome = appState.onNodesUpdated(updatedNodes);
+    if (!outcome.operations.isEmpty()) {
+      execute(outcome.operations);
+    }
+    // rigger a review if the cluster changed
+    if (outcome.clusterChanged) {
+      reviewRequestAndReleaseNodes("nodes updated");
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 0c66e25..6f38eb5 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -1222,11 +1222,11 @@ public class AppState {
    * @return the container request to submit or null if there is none
    */
   private AMRMClient.ContainerRequest createContainerRequest(RoleStatus role) {
-    incrementRequestCount(role);
     OutstandingRequest request = roleHistory.requestContainerForRole(role);
     if (request == null) {
       return null;
     }
+    incrementRequestCount(role);
     if (role.isAntiAffinePlacement()) {
       role.setOutstandingAArequest(request);
     }
@@ -1428,16 +1428,31 @@ public class AppState {
    * Handle node update from the RM. This syncs up the node map with the RM's view
    * @param updatedNodes updated nodes
    */
-  public synchronized List<AbstractRMOperation> onNodesUpdated(List<NodeReport> updatedNodes) {
+  public synchronized NodeUpdatedOutcome onNodesUpdated(List<NodeReport> updatedNodes) {
     boolean changed = roleHistory.onNodesUpdated(updatedNodes);
     if (changed) {
-      log.error("TODO: cancel AA requests and re-review");
-      return cancelOutstandingAARequests();
+      log.info("YARN cluster changed —cancelling current AA requests");
+      List<AbstractRMOperation> operations = cancelOutstandingAARequests();
+      log.debug("Created {} cancel requests", operations.size());
+      return new NodeUpdatedOutcome(true, operations);
     }
-    return new ArrayList<>(0);
+    return new NodeUpdatedOutcome(false, new ArrayList<AbstractRMOperation>(0));
   }
 
   /**
+   * Return value of the {@link #onNodesUpdated(List)} call.
+   */
+  public static class NodeUpdatedOutcome {
+    public final boolean clusterChanged;
+    public final List<AbstractRMOperation> operations;
+
+    public NodeUpdatedOutcome(boolean clusterChanged,
+        List<AbstractRMOperation> operations) {
+      this.clusterChanged = clusterChanged;
+      this.operations = operations;
+    }
+  }
+  /**
    * Is a role short lived by the threshold set for this application
    * @param instance instance
    * @return true if the instance is considered short lived
@@ -1885,13 +1900,17 @@ public class AppState {
   }
 
   /**
-   * Escalate operation as triggered by external timer.
+   * Cancel any outstanding AA Requests, building up the list of ops to
+   * cancel, removing them from RoleHistory structures and the RoleStatus
+   * entries.
    * @return a (usually empty) list of cancel/request operations.
    */
   public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+    // get the list of cancel operations
     List<AbstractRMOperation> operations = roleHistory.cancelOutstandingAARequests();
     for (RoleStatus roleStatus : roleStatusMap.values()) {
-      if (roleStatus.isAntiAffinePlacement()) {
+      if (roleStatus.isAARequestOutstanding()) {
+        log.info("Cancelling outstanding AA request for {}", roleStatus);
         roleStatus.cancelOutstandingAARequest();
       }
     }
@@ -2225,6 +2244,9 @@ public class AppState {
             log.info("Asking for next container for AA role {}", roleName);
             role.decPendingAntiAffineRequests();
             addContainerRequest(operations, createContainerRequest(role));
+            log.debug("Current AA role status {}", role);
+          } else {
+            log.info("AA request sequence completed for role {}", role);
           }
         }
 
@@ -2310,4 +2332,19 @@ public class AppState {
     // now pretend it has just started
     innerOnNodeManagerContainerStarted(cid);
   }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("AppState{");
+    sb.append("applicationLive=").append(applicationLive);
+    sb.append(", live nodes=").append(liveNodes.size());
+    sb.append(", startedContainers=").append(startedContainers);
+    sb.append(", startFailedContainerCount=").append(startFailedContainerCount);
+    sb.append(", surplusContainers=").append(surplusContainers);
+    sb.append(", failedContainerCount=").append(failedContainerCount);
+    sb.append(", outstandingContainerRequests=")
+        .append(outstandingContainerRequests);
+    sb.append('}');
+    return sb.toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index 4209449..66d201f 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -390,6 +390,7 @@ public class OutstandingRequestTracker {
   @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
   public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
 
+    log.debug("Looking for AA request to cancel");
     List<AbstractRMOperation> operations = new ArrayList<>();
 
     // first, all placed requests
@@ -404,15 +405,18 @@ public class OutstandingRequestTracker {
       }
     }
     // second, all open requests
-    for (OutstandingRequest outstandingRequest : openRequests) {
+    ListIterator<OutstandingRequest> orit = openRequests.listIterator();
+    while (orit.hasNext()) {
+      OutstandingRequest outstandingRequest =  orit.next();
       synchronized (outstandingRequest) {
         if (outstandingRequest.isAntiAffine()) {
           // time to escalate
           operations.add(outstandingRequest.createCancelOperation());
-          openRequests.remove(outstandingRequest);
+          orit.remove();
         }
       }
     }
+    log.info("Cancelling {} outstanding AA requests", operations.size());
 
     return operations;
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index d7e6050..00b5226 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -1030,11 +1030,6 @@ public class RoleHistory {
     List<OutstandingRequest> requests =
         outstandingRequests.extractOpenRequestsForRole(roleId, toCancel);
 
-    if (role.isAntiAffinePlacement()) {
-      // TODO: AA
-      // AA placement, so clear the role info
-      role.cancelOutstandingAARequest();
-    }
     // are there any left?
     int remaining = toCancel - requests.size();
     // ask for some placed nodes

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index a14a84b..b530d18 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -171,7 +171,7 @@ public final class RoleStatus implements Cloneable {
   public void cancel(long count) {
     requested.decToFloor(count);
   }
-  
+
   public void decRequested() {
     cancel(1);
   }
@@ -334,8 +334,11 @@ public final class RoleStatus implements Cloneable {
    * if there are no outstanding requests.
    */
   public void cancelOutstandingAARequest() {
-    setOutstandingAArequest(null);
-    setPendingAntiAffineRequests(0);
+    if (outstandingAArequest != null) {
+      setOutstandingAArequest(null);
+      setPendingAntiAffineRequests(0);
+      decRequested();
+    }
   }
 
   /**
@@ -366,25 +369,33 @@ public final class RoleStatus implements Cloneable {
   }
 
   @Override
-  public synchronized String toString() {
-    return "RoleStatus{" +
-           "name='" + name + '\'' +
-           ", key=" + key +
-           ", desired=" + desired +
-           ", actual=" + actual +
-           ", requested=" + requested +
-           ", releasing=" + releasing +
-           ", pendingAntiAffineRequests=" + pendingAntiAffineRequests +
-           ", failed=" + failed +
-           ", failed recently=" + failedRecently.get() +
-           ", node failed=" + nodeFailed.get() +
-           ", pre-empted=" + preempted.get() +
-           ", started=" + started +
-           ", startFailed=" + startFailed +
-           ", completed=" + completed +
-           ", failureMessage='" + failureMessage + '\'' +
-           ", providerRole=" + providerRole +
-           '}';
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("RoleStatus{");
+    sb.append("name='").append(name).append('\'');
+    sb.append(", key=").append(key);
+    sb.append(", desired=").append(desired);
+    sb.append(", actual=").append(actual);
+    sb.append(", requested=").append(requested);
+    sb.append(", releasing=").append(releasing);
+    sb.append(", failed=").append(failed);
+    sb.append(", startFailed=").append(startFailed);
+    sb.append(", started=").append(started);
+    sb.append(", completed=").append(completed);
+    sb.append(", totalRequested=").append(totalRequested);
+    sb.append(", preempted=").append(preempted);
+    sb.append(", nodeFailed=").append(nodeFailed);
+    sb.append(", failedRecently=").append(failedRecently);
+    sb.append(", limitsExceeded=").append(limitsExceeded);
+    sb.append(", resourceRequirements=").append(resourceRequirements);
+    sb.append(", isAntiAffinePlacement=").append(isAntiAffinePlacement());
+    if (isAntiAffinePlacement()) {
+      sb.append(", pendingAntiAffineRequests=").append(pendingAntiAffineRequests);
+      sb.append(", outstandingAArequest=").append(outstandingAArequest);
+    }
+    sb.append(", failureMessage='").append(failureMessage).append('\'');
+    sb.append(", providerRole=").append(providerRole);
+    sb.append('}');
+    return sb.toString();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index c98f3bf..9a325d7 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -21,13 +21,18 @@ package org.apache.slider.server.appmaster.model.appstate
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.Container
+import org.apache.hadoop.yarn.api.records.NodeReport
+import org.apache.hadoop.yarn.api.records.NodeState
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.slider.providers.PlacementPolicy
 import org.apache.slider.providers.ProviderRole
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockFactory
+import org.apache.slider.server.appmaster.model.mock.MockNodeReport
 import org.apache.slider.server.appmaster.model.mock.MockRoles
+import org.apache.slider.server.appmaster.model.mock.MockYarnEngine
 import org.apache.slider.server.appmaster.operations.AbstractRMOperation
+import org.apache.slider.server.appmaster.state.AppState
 import org.apache.slider.server.appmaster.state.AppStateBindingInfo
 import org.apache.slider.server.appmaster.state.ContainerAssignment
 import org.apache.slider.server.appmaster.state.NodeMap
@@ -55,6 +60,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
       null)
 
   RoleStatus aaRole
+  private int NODES = 3
 
   @Override
   AppStateBindingInfo buildBindingInfo() {
@@ -73,6 +79,11 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     aaRole = lookupRole(AAROLE.name)
   }
 
+  @Override
+  MockYarnEngine createYarnEngine() {
+    new MockYarnEngine(NODES, 8)
+  }
+
   /**
    * Get the single request of a list of operations; includes the check for the size
    * @param ops operations list of size 1
@@ -87,7 +98,6 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
   public void testAllocateAANoLabel() throws Throwable {
     assert cloneNodemap().size() > 0
 
-
     // want multiple instances, so there will be iterations
     aaRole.desired = 2
 
@@ -111,7 +121,6 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     assert !hostInstance.canHost(aaRole.key, "")
     assert !hostInstance.canHost(aaRole.key, null)
 
-
     // assignment
     assert assignments.size() == 1
 
@@ -205,7 +214,6 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     submitOperations(ops, [], ops2).size()
     assert 1 == ops2.size()
     assertAllContainersAA()
-
   }
 
   /**
@@ -241,17 +249,70 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
   }
 
   /**
-   * Scan through all containers and assert that the assignment is AA
-   * @param index role index
+   *
+   * @throws Throwable
    */
-  void assertAllContainersAA(String index) {
-    def nodemap = stateAccess.nodeInformationSnapshot
-    nodemap.each { name, info ->
-      def nodeEntry = info.entries[index]
-      assert nodeEntry == null ||
-             (nodeEntry.live -nodeEntry.releasing + nodeEntry.starting)  <= 1 ,
-      "too many instances on node $name"
+  @Test
+  public void testAskForTooMany() throws Throwable {
+
+    describe("Ask for 1 more than the no of available nodes;" +
+             " expect the final request to be unsatisfied until the cluster changes size")
+    //more than expected
+    aaRole.desired = NODES + 1
+    List<AbstractRMOperation > operations = appState.reviewRequestAndReleaseNodes()
+    assert aaRole.AARequestOutstanding
+    assert NODES == aaRole.pendingAntiAffineRequests
+    for (int i = 0; i < NODES; i++) {
+      def iter = "Iteration $i role = $aaRole"
+      log.info(iter)
+      List<AbstractRMOperation > operationsOut = []
+      assert 1 == submitOperations(operations, [], operationsOut).size(), iter
+      operations = operationsOut
+      if (i + 1 < NODES) {
+        assert operations.size() == 2
+      } else {
+        assert operations.size() == 1
+      }
+      assertAllContainersAA()
     }
+    // expect an outstanding AA request to be unsatisfied
+    assert aaRole.actual < aaRole.desired
+    assert !aaRole.requested
+    assert !aaRole.AARequestOutstanding
+    List<Container> allocatedContainers = engine.execute(operations, [])
+    assert 0 == allocatedContainers.size()
+    // in a review now, no more requests can be generated, as there is no space for AA placements,
+    // even though there is cluster capacity
+    assert 0 == appState.reviewRequestAndReleaseNodes().size()
+
+    // now do a node update (this doesn't touch the YARN engine; the node isn't really there)
+    def outcome = addNewNode()
+    assert cloneNodemap().size() == NODES + 1
+    assert outcome.clusterChanged
+    // no active calls to empty
+    assert outcome.operations.empty
+    assert 1 == appState.reviewRequestAndReleaseNodes().size()
+  }
+
+  protected AppState.NodeUpdatedOutcome addNewNode() {
+    NodeReport report = new MockNodeReport("four", NodeState.RUNNING) as NodeReport
+    appState.onNodesUpdated([report])
   }
 
+  @Test
+  public void testClusterSizeChangesDuringRequestSequence() throws Throwable {
+    describe("Change the cluster size where the cluster size changes during a test sequence.")
+    aaRole.desired = NODES + 1
+    List<AbstractRMOperation> operations = appState.reviewRequestAndReleaseNodes()
+    assert aaRole.AARequestOutstanding
+    assert NODES == aaRole.pendingAntiAffineRequests
+    def outcome = addNewNode()
+    assert outcome.clusterChanged
+    // one call to cancel
+    assert 1 == outcome.operations.size()
+    // and on a review, one more to rebuild
+    assert 1 == appState.reviewRequestAndReleaseNodes().size()
+  }
+
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index 9d0efa2..de85bba 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -44,9 +44,6 @@ class TestRoleHistoryAA extends SliderTestBase {
   NodeMap nodeMap, gpuNodeMap
   RoleHistory roleHistory = new MockRoleHistory(MockFactory.ROLES)
 
-  AMRMClient.ContainerRequest requestContainer(RoleStatus roleStatus) {
-    roleHistory.requestContainerForRole(roleStatus).issuedRequest
-  }
 
   @Override
   void setup() {
@@ -159,7 +156,6 @@ class TestRoleHistoryAA extends SliderTestBase {
     assert node1.canHost(2,"")
   }
 
-
   public List<NodeInstance> assertNoAvailableNodes(int role = 1, String label = "") {
     return verifyResultSize(0, nodeMap.findAllNodesForRole(role, label))
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/5a61b4cd/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index 3d472f1..4cb441d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -279,7 +279,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
    * @return a list of roles
    */
   public List<RoleInstance> createAndSubmitNodes() {
-    return createAndSubmitNodes([])
+    return createAndSubmitNodes([], [])
   }
 
   /**
@@ -288,9 +288,10 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
    * @return a list of roles allocated
    */
   public List<RoleInstance> createAndSubmitNodes(
-      List<ContainerId> containerIds) {
+      List<ContainerId> containerIds,
+      List<AbstractRMOperation> operationsOut = []) {
     List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
-    return submitOperations(ops, containerIds)
+    return submitOperations(ops, containerIds, operationsOut)
   }
 
   /**
@@ -398,4 +399,18 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
     assert 1 == ops.size()
     getRequest(ops, 0)
   }
+
+  /**
+   * Scan through all containers and assert that the assignment is AA
+   * @param index role index
+   */
+  void assertAllContainersAA(String index) {
+    def nodemap = stateAccess.nodeInformationSnapshot
+    nodemap.each { name, info ->
+      def nodeEntry = info.entries[index]
+      assert nodeEntry == null ||
+             (nodeEntry.live - nodeEntry.releasing + nodeEntry.starting) <= 1,
+          "too many instances on node $name"
+    }
+  }
 }


[04/12] incubator-slider git commit: javadoc tweak on createZookeepernode

Posted by st...@apache.org.
javadoc tweak on createZookeepernode


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/c4e73291
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/c4e73291
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/c4e73291

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: c4e7329152283cb3183fe7de859382daf6aad519
Parents: 618782a
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:57:37 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:57:37 2015 +0000

----------------------------------------------------------------------
 .../main/java/org/apache/slider/client/SliderClient.java  | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/c4e73291/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
index eeda1e3..59e6848 100644
--- a/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
+++ b/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
@@ -503,6 +503,13 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe
 
   /**
    * Create the zookeeper node associated with the calling user and the cluster
+   *
+   * @param clusterName slider application name
+   * @param nameOnly should the name only be created (i.e. don't create ZK node)
+   * @return the path, using the policy implemented in
+   *   {@link ZKIntegration#mkClusterPath(String, String)}
+   * @throws YarnException
+   * @throws IOException
    */
   @VisibleForTesting
   public String createZookeeperNode(String clusterName, Boolean nameOnly) throws YarnException, IOException {
@@ -522,7 +529,8 @@ public class SliderClient extends AbstractSliderLaunchedService implements RunSe
    * -throwing exceptions on any failure
    * @param clusterName cluster name
    * @param nameOnly create the path, not the node
-   * @return the path, with the node created
+   * @return the path, using the policy implemented in
+   *   {@link ZKIntegration#mkClusterPath(String, String)}
    * @throws YarnException
    * @throws IOException
    * @throws KeeperException


[07/12] incubator-slider git commit: SLIDER-967 tests on rolehistory & nodemap. low-level tests work, but the app state ones aren't tagging requested nodes as unavailable

Posted by st...@apache.org.
SLIDER-967 tests on rolehistory & nodemap. low-level tests work, but the app state ones aren't tagging requested nodes as unavailable


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/a7ba72e0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/a7ba72e0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/a7ba72e0

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: a7ba72e0ede14b70806b44d61d81bc4357052dbc
Parents: 7899f59
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 12:03:11 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 12:03:11 2015 +0000

----------------------------------------------------------------------
 .../slider/api/types/NodeEntryInformation.java  | 18 +++++
 .../server/appmaster/state/NodeInstance.java    | 16 +++-
 .../slider/server/appmaster/state/NodeMap.java  |  1 +
 .../server/appmaster/state/RoleHistory.java     |  2 +-
 .../appstate/TestMockAppStateAAPlacement.groovy | 18 ++++-
 .../TestMockAppStateContainerFailure.groovy     |  2 +-
 .../model/history/TestRoleHistoryAA.groovy      | 81 +++++++++++++++++---
 .../model/mock/BaseMockAppStateTest.groovy      |  4 +-
 8 files changed, 125 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java b/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
index 482b0c7..15b57b0 100644
--- a/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
+++ b/slider-core/src/main/java/org/apache/slider/api/types/NodeEntryInformation.java
@@ -58,4 +58,22 @@ public class NodeEntryInformation {
 
   /** number of starting instances */
   public int starting;
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder(
+        "NodeEntryInformation{");
+    sb.append("priority=").append(priority);
+    sb.append(", live=").append(live);
+    sb.append(", requested=").append(requested);
+    sb.append(", releasing=").append(releasing);
+    sb.append(", starting=").append(starting);
+    sb.append(", failed=").append(failed);
+    sb.append(", failedRecently=").append(failedRecently);
+    sb.append(", startFailed=").append(startFailed);
+    sb.append(", preempted=").append(preempted);
+    sb.append(", lastUsed=").append(lastUsed);
+    sb.append('}');
+    return sb.toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index ebd9d5a..8110bff 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -205,7 +205,6 @@ public class NodeInstance {
     nodeEntries.add(nodeEntry);
   }
 
-  
   /**
    * run through each entry; gc'ing & removing old ones that don't have
    * a recent failure count (we care about those)
@@ -381,5 +380,20 @@ public class NodeInstance {
       return activeRight - activeLeft;
     }
   }
+  /**
+   * A comparator for sorting entries alphabetically
+   */
+  public static class CompareNames implements Comparator<NodeInstance>,
+                                           Serializable {
+
+    public CompareNames() {
+    }
+
+    @Override
+    public int compare(NodeInstance left, NodeInstance right) {
+      return left.hostname.compareTo(right.hostname);
+    }
+  }
+
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index aea48b3..23411ca 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -155,6 +155,7 @@ public class NodeMap extends HashMap<String, NodeInstance> {
         nodes.add(instance);
       }
     }
+    Collections.sort(nodes, new NodeInstance.CompareNames());
     return nodes;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 2ca5367..8a840fc 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -512,7 +512,7 @@ public class RoleHistory {
   }
 
   /**
-   * Get a possibly emtpy list of suggested nodes for a role.
+   * Get a possibly empty list of suggested nodes for a role.
    * @param id role ID
    * @return list
    */

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index c7f59e3..64c0362 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -30,6 +30,7 @@ import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.AppStateBindingInfo
 import org.apache.slider.server.appmaster.state.ContainerAssignment
+import org.apache.slider.server.appmaster.state.NodeInstance
 import org.apache.slider.server.appmaster.state.RoleInstance
 import org.apache.slider.server.appmaster.state.RoleStatus
 import org.junit.Test
@@ -105,6 +106,13 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     List<AbstractRMOperation> operations = []
     appState.onContainersAllocated([allocated], assignments, operations)
 
+    def host = allocated.nodeId.host
+    def hostInstance = nodemap.get(host)
+    assert hostInstance.get(aaRole.key).starting == 1
+    assert !hostInstance.canHost(aaRole.key, "")
+    assert !hostInstance.canHost(aaRole.key, null)
+
+
     // assignment
     assert assignments.size() == 1
 
@@ -115,6 +123,10 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     // we also expect a new allocation request to have been issued
 
     def req2 = getRequest(operations, 1)
+    assert req2.nodes.size() == engine.cluster.clusterSize - 1
+
+    assert !req2.nodes.contains(host)
+    assert !request.relaxLocality
 
     // verify the pending couner is down
     assert 0L == aaRole.pendingAntiAffineRequests
@@ -149,11 +161,13 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
     assert ops2.empty
     assert aaRole.pendingAntiAffineRequests == 2
+    assertAllContainersAA()
 
     // next iter
     assert 1 == submitOperations(ops, [], ops2).size()
     assert 2 == ops2.size()
     assert aaRole.pendingAntiAffineRequests == 1
+    assertAllContainersAA()
 
     assert 0 == appState.reviewRequestAndReleaseNodes().size()
     // now trigger the next execution cycle
@@ -187,6 +201,8 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     // next iter
     submitOperations(ops, [], ops2).size()
     assert 1 == ops2.size()
+    assertAllContainersAA()
+
   }
 
   /**
@@ -230,7 +246,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     nodemap.each { name, info ->
       def nodeEntry = info.entries[index]
       assert nodeEntry == null ||
-             (nodeEntry.live + nodeEntry.starting + nodeEntry.releasing)  <= 1 ,
+             (nodeEntry.live -nodeEntry.releasing + nodeEntry.starting)  <= 1 ,
       "too many instances on node $name"
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
index 5b24a59..3235827 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateContainerFailure.groovy
@@ -58,7 +58,7 @@ class TestMockAppStateContainerFailure extends BaseMockAppStateTest
    */
   @Override
   MockYarnEngine createYarnEngine() {
-    return new MockYarnEngine(8000, 4)
+    return new MockYarnEngine(4, 8000)
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index fdbc3b4..9d0efa2 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -58,32 +58,40 @@ class TestRoleHistoryAA extends SliderTestBase {
   @Test
   public void testFindNodesInFullCluster() throws Throwable {
     // all three will surface at first
-    assertResultSize(3, nodeMap.findAllNodesForRole(1, ""))
+    verifyResultSize(3, nodeMap.findAllNodesForRole(1, ""))
   }
 
   @Test
   public void testFindNodesInUnhealthyCluster() throws Throwable {
     // all three will surface at first
-    nodeMap.get("one").updateNode(new MockNodeReport("one",NodeState.UNHEALTHY))
-    assertResultSize(2, nodeMap.findAllNodesForRole(1, ""))
+    markNodeOneUnhealthy()
+    verifyResultSize(2, nodeMap.findAllNodesForRole(1, ""))
+  }
+
+  public boolean markNodeOneUnhealthy() {
+    return setNodeState(nodeMap.get("one"), NodeState.UNHEALTHY)
+  }
+
+  protected boolean setNodeState(NodeInstance node, NodeState state) {
+    node.updateNode(new  MockNodeReport(node.hostname, state))
   }
 
   @Test
   public void testFindNoNodesWrongLabel() throws Throwable {
     // all three will surface at first
-    assertResultSize(0, nodeMap.findAllNodesForRole(1, "GPU"))
+    verifyResultSize(0, nodeMap.findAllNodesForRole(1, "GPU"))
   }
 
   @Test
   public void testFindNoNodesRightLabel() throws Throwable {
     // all three will surface at first
-    assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, "GPU"))
+    verifyResultSize(3, gpuNodeMap.findAllNodesForRole(1, "GPU"))
   }
 
   @Test
   public void testFindNoNodesNoLabel() throws Throwable {
     // all three will surface at first
-    assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, ""))
+    verifyResultSize(3, gpuNodeMap.findAllNodesForRole(1, ""))
   }
 
   @Test
@@ -92,7 +100,7 @@ class TestRoleHistoryAA extends SliderTestBase {
     applyToNodeEntries(nodeMap) {
       NodeEntry it -> it.request()
     }
-    assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
+    assertNoAvailableNodes(1)
   }
 
   @Test
@@ -101,14 +109,67 @@ class TestRoleHistoryAA extends SliderTestBase {
     applyToNodeEntries(nodeMap) {
       NodeEntry it -> it.request()
     }
-    assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
+    assertNoAvailableNodes(1)
+  }
+
+  /**
+   * Tag all nodes as starting, then walk one through a bit
+   * more of its lifecycle
+   */
+  @Test
+  public void testFindNoNodesLifecycle() throws Throwable {
+    // all three will surface at first
+    applyToNodeEntries(nodeMap) {
+      NodeEntry it -> it.onStarting()
+    }
+    assertNoAvailableNodes(1)
+
+    // walk one of the nodes through the lifecycle
+    def node1 = nodeMap.get("one")
+    assert !node1.canHost(1,"")
+    node1.get(1).onStartCompleted()
+    assert !node1.canHost(1,"")
+    assertNoAvailableNodes()
+    node1.get(1).release()
+    assert node1.canHost(1,"")
+    def list2 = verifyResultSize(1, nodeMap.findAllNodesForRole(1, ""))
+    assert list2[0].hostname == "one"
+
+    // now tag that node as unhealthy and expect it to go away
+    markNodeOneUnhealthy()
+    assertNoAvailableNodes()
+  }
+
+  @Test
+  public void testRolesIndependent() throws Throwable {
+    def node1 = nodeMap.get("one")
+    def role1 = node1.getOrCreate(1)
+    def role2 = node1.getOrCreate(2)
+    nodeMap.values().each {
+      it.updateNode(new MockNodeReport("", NodeState.UNHEALTHY))
+    }
+    assertNoAvailableNodes(1)
+    assertNoAvailableNodes(2)
+    assert setNodeState(node1, NodeState.RUNNING)
+    // tag role 1 as busy
+    role1.onStarting()
+    assertNoAvailableNodes(1)
+
+    verifyResultSize(1, nodeMap.findAllNodesForRole(2, ""))
+    assert node1.canHost(2,"")
+  }
+
+
+  public List<NodeInstance> assertNoAvailableNodes(int role = 1, String label = "") {
+    return verifyResultSize(0, nodeMap.findAllNodesForRole(role, label))
   }
 
-  def assertResultSize(int size, List<NodeInstance> list) {
+  List<NodeInstance> verifyResultSize(int size, List<NodeInstance> list) {
     if (list.size() != size) {
-      list.each { log.error(it.toFullString())}
+      list.each { log.error(it.toFullString()) }
     }
     assert size == list.size()
+    list
   }
 
   def applyToNodeEntries(Collection<NodeInstance> list, Closure cl) {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/a7ba72e0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index e1660ee..3d472f1 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -69,10 +69,9 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
    * @return
    */
   public MockYarnEngine createYarnEngine() {
-    return new MockYarnEngine(64, 1)
+    return new MockYarnEngine(8, 8)
   }
 
-
   @Override
   void setup() {
     super.setup()
@@ -89,7 +88,6 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
    */
   void initApp(){
     String historyDirName = testName;
-    YarnConfiguration conf = SliderUtils.createConfiguration()
     applicationId = new MockApplicationId(id: 1, clusterTimestamp: 0)
     applicationAttemptId = new MockApplicationAttemptId(
         applicationId: applicationId,


[06/12] incubator-slider git commit: SLIDER-967 Use nodemap to build up location restrictions on AA placement. This is the core of the AA placement algorithm: it finds nodes that are free to use.

Posted by st...@apache.org.
SLIDER-967 Use nodemap to build up location restrictions on AA placement.
This is the core of the AA placement algorithm: it finds nodes that are free to use.


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/7899f59a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/7899f59a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/7899f59a

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 7899f59a1cf12ae88775dcdd85a712f96cd6eb7c
Parents: 856ab84
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 20:11:50 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 20:11:50 2015 +0000

----------------------------------------------------------------------
 .../slider/api/proto/RestTypeMarshalling.java   |  10 +-
 .../slider/api/types/NodeInformation.java       |   4 +-
 .../server/appmaster/SliderAppMaster.java       |   6 +-
 .../server/appmaster/rpc/SliderIPCService.java  |   1 -
 .../slider/server/appmaster/state/AppState.java |  91 ++++++++-------
 .../server/appmaster/state/NodeEntry.java       |   2 +-
 .../server/appmaster/state/NodeInstance.java    |  11 +-
 .../slider/server/appmaster/state/NodeMap.java  |   4 +-
 .../appmaster/state/OutstandingRequest.java     |  51 ++++++++-
 .../state/OutstandingRequestTracker.java        |  55 +++++++++
 .../server/appmaster/state/RoleHistory.java     | 112 +++++++++++++++++--
 .../server/appmaster/state/RoleStatus.java      |  26 ++++-
 .../appstate/TestMockAppStateAAPlacement.groovy |  31 ++++-
 .../model/history/TestRoleHistoryAA.groovy      |  25 +++--
 ...stRoleHistoryFindNodesForNewInstances.groovy |  20 ++--
 ...tRoleHistoryOutstandingRequestTracker.groovy |  30 ++++-
 .../TestRoleHistoryRequestTracking.groovy       |  12 +-
 .../model/mock/BaseMockAppStateTest.groovy      |   4 +
 18 files changed, 397 insertions(+), 98 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java b/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
index b7985e6..85a8358 100644
--- a/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
+++ b/slider-core/src/main/java/org/apache/slider/api/proto/RestTypeMarshalling.java
@@ -38,6 +38,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
 
 /**
@@ -160,8 +162,8 @@ public class RestTypeMarshalling {
       builder.setLabels(info.labels);
     }
 
-    List<NodeEntryInformation> entries = info.entries;
-    if (entries != null) {
+    if (info.entries != null) {
+      Collection<NodeEntryInformation> entries = info.entries.values();
       for (NodeEntryInformation entry : entries) {
         Messages.NodeEntryInformationProto.Builder node =
             Messages.NodeEntryInformationProto.newBuilder();
@@ -192,7 +194,7 @@ public class RestTypeMarshalling {
     info.state = wire.getState();
     List<Messages.NodeEntryInformationProto> entriesList = wire.getEntriesList();
     if (entriesList != null) {
-      info.entries = new ArrayList<>(entriesList.size());
+      info.entries = new HashMap<>(entriesList.size());
       for (Messages.NodeEntryInformationProto entry : entriesList) {
         NodeEntryInformation nei = new NodeEntryInformation();
         nei.failed = entry.getFailed();
@@ -205,7 +207,7 @@ public class RestTypeMarshalling {
         nei.releasing = entry.getReleasing();
         nei.startFailed = entry.getStartFailed();
         nei.starting = entry.getStarting();
-        info.entries.add(nei);
+        info.entries.put(Integer.toString(nei.priority), nei);
       }
     }
     return info;

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java b/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
index 049ee52..edf7e21 100644
--- a/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
+++ b/slider-core/src/main/java/org/apache/slider/api/types/NodeInformation.java
@@ -22,7 +22,9 @@ import org.codehaus.jackson.annotate.JsonIgnoreProperties;
 import org.codehaus.jackson.map.annotate.JsonSerialize;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Serialized node information. Must be kept in sync with the protobuf equivalent.
@@ -38,5 +40,5 @@ public class NodeInformation {
   public long lastUpdated;
   public String rackName;
   public String state;
-  public List<NodeEntryInformation> entries = new ArrayList<>();
+  public Map<String, NodeEntryInformation> entries = new HashMap<>();
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index d74688b..171451e 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -1826,7 +1826,6 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
    */
   private void releaseAllContainers() {
     List<AbstractRMOperation> operations = appState.releaseAllContainers();
-    providerRMOperationHandler.execute(operations);
     //now apply the operations
     execute(operations);
   }
@@ -1852,7 +1851,10 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
     LOG_YARN.info("onNodesUpdated({})", updatedNodes.size());
     log.info("Updated nodes {}", updatedNodes);
     // Check if any nodes are lost or revived and update state accordingly
-    appState.onNodesUpdated(updatedNodes);
+    List<AbstractRMOperation> operations = appState.onNodesUpdated(updatedNodes);
+    execute(operations);
+    // if there were any operations, trigger a review
+    reviewRequestAndReleaseNodes("nodes updated");
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
index bb8f512..a983f53 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/rpc/SliderIPCService.java
@@ -448,7 +448,6 @@ public class SliderIPCService extends AbstractService
     }
   }
 
-
   @Override
   public Messages.WrappedJsonProto getModelDesired(Messages.EmptyPayloadProto request) throws IOException {
     return lookupAggregateConf(MODEL_DESIRED);

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 063a7fc..c960510 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -663,7 +663,6 @@ public class AppState {
     return newRole;
   }
 
-
   /**
    * Actions to perform when an instance definition is updated
    * Currently: 
@@ -678,9 +677,9 @@ public class AppState {
    *  
    * @throws BadConfigException
    */
-  private synchronized void onInstanceDefinitionUpdated() throws
-                                                          BadConfigException,
-                                                          IOException {
+  private synchronized void onInstanceDefinitionUpdated()
+      throws BadConfigException, IOException {
+
     log.debug("Instance definition updated");
     //note the time 
     snapshotTime = now();
@@ -1220,11 +1219,14 @@ public class AppState {
    * Anti-Affine: the {@link RoleStatus#outstandingAArequest} is set here.
    * This is where role history information will be used for placement decisions.
    * @param role role
-   * @return the container request to submit
+   * @return the container request to submit or null if there is none
    */
   private AMRMClient.ContainerRequest createContainerRequest(RoleStatus role) {
     incrementRequestCount(role);
     OutstandingRequest request = roleHistory.requestContainerForRole(role);
+    if (request == null) {
+      return null;
+    }
     if (role.isAntiAffinePlacement()) {
       role.setOutstandingAArequest(request);
     }
@@ -1426,12 +1428,13 @@ public class AppState {
    * Handle node update from the RM. This syncs up the node map with the RM's view
    * @param updatedNodes updated nodes
    */
-  public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
+  public synchronized List<AbstractRMOperation> onNodesUpdated(List<NodeReport> updatedNodes) {
     boolean changed = roleHistory.onNodesUpdated(updatedNodes);
     if (changed) {
-      //TODO
       log.error("TODO: cancel AA requests and re-review");
+      return cancelOutstandingAARequests();
     }
+    return new ArrayList<>(0);
   }
 
   /**
@@ -1882,6 +1885,20 @@ public class AppState {
   }
 
   /**
+   * Escalate operation as triggered by external timer.
+   * @return a (usually empty) list of cancel/request operations.
+   */
+  public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+    List<AbstractRMOperation> operations = roleHistory.cancelOutstandingAARequests();
+    for (RoleStatus roleStatus : roleStatusMap.values()) {
+      if (roleStatus.isAntiAffinePlacement()) {
+        roleStatus.cancelOutstandingAARequest();
+      }
+    }
+    return operations;
+  }
+
+  /**
    * Look at the allocation status of one role, and trigger add/release
    * actions if the number of desired role instances doesn't equal 
    * (actual + pending).
@@ -1900,7 +1917,6 @@ public class AppState {
     long delta;
     long expected;
     String name = role.getName();
-    boolean isAA = role.isAntiAffinePlacement();
     synchronized (role) {
       delta = role.getDelta();
       expected = role.getDesired();
@@ -1920,19 +1936,24 @@ public class AppState {
 
     if (delta > 0) {
       // more workers needed than we have -ask for more
-      log.info("{}: Asking for {} more nodes(s) for a total of {} ", name,
-          delta, expected);
-
-      // TODO: AA RH to help here by only allowing one request for an AA
+      log.info("{}: Asking for {} more nodes(s) for a total of {} ", name, delta, expected);
 
-      if (isAA) {
-        // build one only if there is none outstanding
+      if (role.isAntiAffinePlacement()) {
+        // build one only if there is none outstanding, the role history knows
+        // enough about the cluster to ask, and there is somewhere to place
+        // the node
         if (role.getPendingAntiAffineRequests() == 0
+            && !role.isAARequestOutstanding()
             && roleHistory.canPlaceAANodes()) {
-          log.info("Starting an anti-affine request sequence for {} nodes", delta);
           // log the number outstanding
-          role.incPendingAntiAffineRequests(delta - 1);
-          addContainerRequest(operations, createContainerRequest(role));
+          AMRMClient.ContainerRequest request = createContainerRequest(role);
+          if (request != null) {
+            log.info("Starting an anti-affine request sequence for {} nodes", delta);
+            role.incPendingAntiAffineRequests(delta - 1);
+            addContainerRequest(operations, request);
+          } else {
+            log.info("No location for anti-affine request");
+          }
         } else {
           if (roleHistory.canPlaceAANodes()) {
             log.info("Adding {} more anti-affine requests", delta);
@@ -1955,22 +1976,7 @@ public class AppState {
       // reduce the number expected (i.e. subtract the delta)
       long excess = -delta;
 
-      if (isAA) {
-        // there may be pending requests which can be cancelled here
-        long pending = role.getPendingAntiAffineRequests();
-        if (excess <= pending) {
-          long outstanding = pending - excess;
-          log.info("Cancelling {} pending AA allocations, leaving {}", excess, outstanding);
-          role.setPendingAntiAffineRequests(outstanding);
-          excess = 0;
-        } else {
-          // not enough
-          log.info("Cancelling all pending AA allocations");
-          role.setPendingAntiAffineRequests(0);
-          excess -= pending;
-        }
-      }
-      // how many requests are outstanding?
+      // how many requests are outstanding? for AA roles, this includes pending
       long outstandingRequests = role.getRequested();
       if (outstandingRequests > 0) {
         // outstanding requests.
@@ -2052,15 +2058,22 @@ public class AppState {
     return operations;
   }
 
+  /**
+   * Add a container request if the request is non-null
+   * @param operations operations to add the entry to
+   * @param containerAsk what to ask for
+   */
   private void addContainerRequest(List<AbstractRMOperation> operations,
       AMRMClient.ContainerRequest containerAsk) {
-    log.info("Container ask is {} and label = {}", containerAsk,
-        containerAsk.getNodeLabelExpression());
-    int askMemory = containerAsk.getCapability().getMemory();
-    if (askMemory > this.containerMaxMemory) {
-      log.warn("Memory requested: {} > max of {}", askMemory, containerMaxMemory);
+    if (containerAsk != null) {
+      log.info("Container ask is {} and label = {}", containerAsk,
+          containerAsk.getNodeLabelExpression());
+      int askMemory = containerAsk.getCapability().getMemory();
+      if (askMemory > this.containerMaxMemory) {
+        log.warn("Memory requested: {} > max of {}", askMemory, containerMaxMemory);
+      }
+      operations.add(new ContainerRequestOperation(containerAsk));
     }
-    operations.add(new ContainerRequestOperation(containerAsk));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
index 6dae3c6..c180f88 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java
@@ -90,7 +90,7 @@ public class NodeEntry {
    * the number of instances &gt; 1.
    */
   public synchronized boolean isAvailable() {
-    return getActive() == 0 && (requested == 0) && starting == 0;
+    return getActive() == 0 && requested == 0 && starting == 0;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index b805ffb..ebd9d5a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -22,10 +22,12 @@ import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.common.tools.Comparators;
+import org.apache.slider.common.tools.SliderUtils;
 
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.Set;
@@ -64,7 +66,8 @@ public class NodeInstance {
   private String nodeLabels = "";
 
   /**
-   * The list of node entries of specific roles
+   * An unordered list of node entries of specific roles. There's nothing
+   * indexed so as to support sparser datastructures.
    */
   private final List<NodeEntry> nodeEntries;
 
@@ -307,9 +310,9 @@ public class NodeInstance {
       info.rackName = nodeReport.getRackName();
       info.healthReport = nodeReport.getHealthReport();
     }
-    info.entries = new ArrayList<>(nodeEntries.size());
+    info.entries = new HashMap<>(nodeEntries.size());
     for (NodeEntry nodeEntry : nodeEntries) {
-      info.entries.add(nodeEntry.serialize());
+      info.entries.put(Integer.toString(nodeEntry.rolePriority), nodeEntry.serialize());
     }
     return info;
   }
@@ -323,7 +326,7 @@ public class NodeInstance {
    */
   public boolean canHost(int role, String label) {
     return isOnline()
-        && (label.isEmpty() || label.equals(nodeLabels))   // label match
+        && (SliderUtils.isUnset(label) || label.equals(nodeLabels))   // label match
         && (get(role) == null || get(role).isAvailable()); // no live role
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
index 2887c9e..aea48b3 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java
@@ -146,9 +146,9 @@ public class NodeMap extends HashMap<String, NodeInstance> {
    * Scan the current node map for all nodes capable of hosting an instance
    * @param role role ID
    * @param label label which must match, or "" for no label checks
-   * @return a list of node instances matching the criteria.
+   * @return a possibly empty list of node instances matching the criteria.
    */
-  public List<NodeInstance> findNodesForRole(int role, String label) {
+  public List<NodeInstance> findAllNodesForRole(int role, String label) {
     List<NodeInstance> nodes = new ArrayList<>(size());
     for (NodeInstance instance : values()) {
       if (instance.canHost(role, label)) {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
index a9d4b52..e211e7f 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
@@ -29,6 +29,7 @@ import org.apache.slider.server.appmaster.operations.CancelSingleRequest;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -53,6 +54,13 @@ public final class OutstandingRequest extends RoleHostnamePair {
   public final NodeInstance node;
 
   /**
+   * A list of all possible nodes to list in an AA request. For a non-AA
+   * request where {@link #node} is set, element 0 of the list is the same
+   * value.
+   */
+  public final List<NodeInstance> nodes = new ArrayList<>(1);
+
+  /**
    * Optional label. This is cached as the request option (explicit-location + label) is forbidden,
    * yet the label needs to be retained for escalation.
    */
@@ -95,6 +103,12 @@ public final class OutstandingRequest extends RoleHostnamePair {
   private int priority = -1;
 
   /**
+   * Is this an Anti-affine request which should be cancelled on
+   * a cluster resize?
+   */
+  private boolean antiAffine = false;
+
+  /**
    * Create a request
    * @param roleId role
    * @param node node -can be null
@@ -103,6 +117,7 @@ public final class OutstandingRequest extends RoleHostnamePair {
                             NodeInstance node) {
     super(roleId, node != null ? node.hostname : null);
     this.node = node;
+    nodes.add(node);
   }
 
   /**
@@ -119,6 +134,19 @@ public final class OutstandingRequest extends RoleHostnamePair {
   }
 
   /**
+   * Create an Anti-affine reques, including all listed nodes (there must be one)
+   * as targets.
+   * @param roleId role
+   * @param nodes list of nodes
+   */
+  public OutstandingRequest(int roleId, List<NodeInstance> nodes) {
+    super(roleId, nodes.get(0).hostname);
+    this.node = null;
+    this.antiAffine = true;
+    this.nodes.addAll(nodes);
+  }
+
+  /**
    * Is the request located in the cluster, that is: does it have a node.
    * @return true if a node instance was supplied in the constructor
    */
@@ -150,6 +178,14 @@ public final class OutstandingRequest extends RoleHostnamePair {
     return priority;
   }
 
+  public boolean isAntiAffine() {
+    return antiAffine;
+  }
+
+  public void setAntiAffine(boolean antiAffine) {
+    this.antiAffine = antiAffine;
+  }
+
   /**
    * Build a container request.
    * <p>
@@ -183,7 +219,19 @@ public final class OutstandingRequest extends RoleHostnamePair {
     NodeInstance target = this.node;
     String nodeLabels;
 
-    if (target != null) {
+    if (isAntiAffine()) {
+      hosts = new String[nodes.size()];
+      int c = 0;
+      for (NodeInstance nodeInstance : nodes) {
+        hosts[c] = nodeInstance.hostname;
+        c++;
+      }
+      log.info("Creating anti-affine request across {} nodes; first node = {}", c, hostname);
+      escalated = false;
+      mayEscalate = false;
+      relaxLocality = false;
+      nodeLabels = label;
+    } else if (target != null) {
       // placed request. Hostname is used in request
       hosts = new String[1];
       hosts[0] = target.hostname;
@@ -215,7 +263,6 @@ public final class OutstandingRequest extends RoleHostnamePair {
                                       pri,
                                       relaxLocality,
                                       nodeLabels);
-
     validate();
     return issuedRequest;
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index ecdc07a..4209449 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -93,6 +93,23 @@ public class OutstandingRequestTracker {
   }
 
   /**
+   * Create a new Anti-affine request for the specific role
+   * <p>
+   * It is added to {@link #openRequests}
+   * <p>
+   * This does not update the node instance's role's request count
+   * @param role role index
+   * @param nodes list of suitable nodes
+   * @return a new request
+   */
+  public synchronized OutstandingRequest newAARequest(int role, List<NodeInstance> nodes) {
+    Preconditions.checkArgument(!nodes.isEmpty());
+    OutstandingRequest request = new OutstandingRequest(role, nodes);
+    openRequests.add(request);
+    return request;
+  }
+
+  /**
    * Look up any oustanding request to a (role, hostname). 
    * @param role role index
    * @param hostname hostname
@@ -364,6 +381,43 @@ public class OutstandingRequestTracker {
   }
 
   /**
+   * Cancel all outstanding AA requests from the lists of requests.
+   *
+   * This does not remove them from the role status; they must be reset
+   * by the caller.
+   *
+   */
+  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
+  public synchronized List<AbstractRMOperation> cancelOutstandingAARequests() {
+
+    List<AbstractRMOperation> operations = new ArrayList<>();
+
+    // first, all placed requests
+    for (Map.Entry<RoleHostnamePair, OutstandingRequest> entry : placedRequests.entrySet()) {
+      OutstandingRequest outstandingRequest = entry.getValue();
+      synchronized (outstandingRequest) {
+        if (outstandingRequest.isAntiAffine()) {
+          // time to escalate
+          operations.add(outstandingRequest.createCancelOperation());
+          placedRequests.remove(entry.getKey());
+        }
+      }
+    }
+    // second, all open requests
+    for (OutstandingRequest outstandingRequest : openRequests) {
+      synchronized (outstandingRequest) {
+        if (outstandingRequest.isAntiAffine()) {
+          // time to escalate
+          operations.add(outstandingRequest.createCancelOperation());
+          openRequests.remove(outstandingRequest);
+        }
+      }
+    }
+
+    return operations;
+  }
+
+  /**
    * Extract a specific number of open requests for a role
    * @param roleId role Id
    * @param count count to extract
@@ -382,6 +436,7 @@ public class OutstandingRequestTracker {
     }
     return results;
   }
+
   /**
    * Extract a specific number of placed requests for a role
    * @param roleId role Id

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index df1f4e1..2ca5367 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -19,13 +19,13 @@
 package org.apache.slider.server.appmaster.state;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.client.api.AMRMClient;
 import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.common.tools.SliderUtils;
 import org.apache.slider.core.exceptions.BadConfigException;
@@ -46,11 +46,9 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 
 /**
@@ -549,7 +547,7 @@ public class RoleHistory {
    * @return the instance, or null for none
    */
   @VisibleForTesting
-  public synchronized NodeInstance findNodeForNewInstance(RoleStatus role) {
+  public synchronized NodeInstance findRecentNodeForNewInstance(RoleStatus role) {
     if (!role.isPlacementDesired()) {
       // no data locality policy
       return null;
@@ -591,6 +589,18 @@ public class RoleHistory {
   }
 
   /**
+   * Find a node for use
+   * @param role role
+   * @return the instance, or null for none
+   */
+  @VisibleForTesting
+  public synchronized List<NodeInstance> findNodeForNewAAInstance(RoleStatus role) {
+    // all nodes that are live and can host the role; no attempt to exclude ones
+    // considered failing
+    return nodemap.findAllNodesForRole(role.getKey(), role.getLabelExpression());
+  }
+
+  /**
    * Request an instance on a given node.
    * An outstanding request is created & tracked, with the 
    * relevant node entry for that role updated.
@@ -615,15 +625,29 @@ public class RoleHistory {
    * Find a node for a role and request an instance on that (or a location-less
    * instance)
    * @param role role status
-   * @return a request ready to go
+   * @return a request ready to go, or null if this is an AA request and no
+   * location can be found.
    */
   public synchronized OutstandingRequest requestContainerForRole(RoleStatus role) {
 
     Resource resource = recordFactory.newResource();
     role.copyResourceRequirements(resource);
-    NodeInstance node = findNodeForNewInstance(role);
-    // TODO AA -what if there are no suitable nodes?
-    return requestInstanceOnNode(node, role, resource);
+    if (role.isAntiAffinePlacement()) {
+      // if a placement can be found, return it.
+      List<NodeInstance> nodes = findNodeForNewAAInstance(role);
+      if (!nodes.isEmpty()) {
+        OutstandingRequest outstanding
+            = outstandingRequests.newAARequest(role.getKey(), nodes);
+        outstanding.buildContainerRequest(resource, role, now());
+        return outstanding;
+      } else {
+        log.warn("No suitable location for {}", role.getName());
+        return null;
+      }
+    } else {
+      NodeInstance node = findRecentNodeForNewInstance(role);
+      return requestInstanceOnNode(node, role, resource);
+    }
   }
 
   /**
@@ -972,6 +996,26 @@ public class RoleHistory {
   public List<AbstractRMOperation> escalateOutstandingRequests() {
     return outstandingRequests.escalateOutstandingRequests(now());
   }
+  /**
+   * Escalate operation as triggered by external timer.
+   * @return a (usually empty) list of cancel/request operations.
+   */
+  public List<AbstractRMOperation> cancelOutstandingAARequests() {
+    return outstandingRequests.cancelOutstandingAARequests();
+  }
+
+  /**
+   * Cancel a number of outstanding requests for a role -that is, not
+   * actual containers, just requests for new ones.
+   * @param role role
+   * @param toCancel number to cancel
+   * @return a list of cancellable operations.
+   */
+  public List<AbstractRMOperation> cancelRequestsForRole(RoleStatus role, int toCancel) {
+    return role.isAntiAffinePlacement() ?
+        cancelRequestsForAARole(role, toCancel)
+        : cancelRequestsForSimpleRole(role, toCancel);
+  }
 
   /**
    * Build the list of requests to cancel from the outstanding list.
@@ -979,19 +1023,67 @@ public class RoleHistory {
    * @param toCancel number to cancel
    * @return a list of cancellable operations.
    */
-  public synchronized List<AbstractRMOperation> cancelRequestsForRole(RoleStatus role, int toCancel) {
+  private synchronized List<AbstractRMOperation> cancelRequestsForSimpleRole(RoleStatus role, int toCancel) {
+    Preconditions.checkArgument(toCancel > 0,
+        "trying to cancel invalid number of requests: " + toCancel);
     List<AbstractRMOperation> results = new ArrayList<>(toCancel);
     // first scan through the unplaced request list to find all of a role
     int roleId = role.getKey();
     List<OutstandingRequest> requests =
         outstandingRequests.extractOpenRequestsForRole(roleId, toCancel);
 
+    if (role.isAntiAffinePlacement()) {
+      // TODO: AA
+      // AA placement, so clear the role info
+      role.cancelOutstandingAARequest();
+    }
     // are there any left?
     int remaining = toCancel - requests.size();
     // ask for some placed nodes
     requests.addAll(outstandingRequests.extractPlacedRequestsForRole(roleId, remaining));
 
-    // TODO AA: clear anything here?
+    // build cancellations
+    for (OutstandingRequest request : requests) {
+      results.add(request.createCancelOperation());
+    }
+    return results;
+  }
+
+  /**
+   * Build the list of requests to cancel for an AA role. This reduces the number
+   * of outstanding pending requests first, then cancels any active request,
+   * before finally asking for any placed containers
+   * @param role role
+   * @param toCancel number to cancel
+   * @return a list of cancellable operations.
+   */
+  private synchronized List<AbstractRMOperation> cancelRequestsForAARole(RoleStatus role, int toCancel) {
+    List<AbstractRMOperation> results = new ArrayList<>(toCancel);
+    int roleId = role.getKey();
+    List<OutstandingRequest> requests = new ArrayList<>(toCancel);
+    // there may be pending requests which can be cancelled here
+    long pending = role.getPendingAntiAffineRequests();
+    if (pending > 0) {
+      // there are some pending ones which can be cancelled first
+      long pendingToCancel = Math.min(pending, toCancel);
+      log.info("Cancelling {} pending AA allocations, leaving {}", toCancel,
+          pendingToCancel);
+      role.setPendingAntiAffineRequests(pending - pendingToCancel);
+      toCancel -= pendingToCancel;
+    }
+    if (toCancel > 0 && role.isAARequestOutstanding()) {
+      // not enough
+      log.info("Cancelling current AA request");
+      // find the single entry which may be running
+      requests = outstandingRequests.extractOpenRequestsForRole(roleId, toCancel);
+      role.cancelOutstandingAARequest();
+      toCancel--;
+    }
+
+    // ask for some excess nodes
+    if (toCancel > 0) {
+      requests.addAll(outstandingRequests.extractPlacedRequestsForRole(roleId, toCancel));
+    }
 
     // build cancellations
     for (OutstandingRequest request : requests) {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
index 1beaddc..a14a84b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleStatus.java
@@ -71,7 +71,7 @@ public final class RoleStatus implements Cloneable {
   private final LongGauge pendingAntiAffineRequests = new LongGauge(0);
 
   /** any pending AA request */
-  private OutstandingRequest outstandingAArequest = null;
+  private volatile OutstandingRequest outstandingAArequest = null;
 
   private String failureMessage = "";
 
@@ -155,8 +155,12 @@ public final class RoleStatus implements Cloneable {
     return actual.decToFloor(1);
   }
 
+  /**
+   * Get the request count. For AA roles, this includes pending ones.
+   * @return a count of requested containers
+   */
   public long getRequested() {
-    return requested.get();
+    return requested.get() + pendingAntiAffineRequests.get();
   }
 
   public long incRequested() {
@@ -209,6 +213,14 @@ public final class RoleStatus implements Cloneable {
   }
 
   /**
+   * Probe for an outstanding AA request being true
+   * @return true if there is an outstanding AA Request
+   */
+  public boolean isAARequestOutstanding() {
+    return outstandingAArequest != null;
+  }
+
+  /**
    * Note that a role failed, text will
    * be used in any diagnostics if an exception
    * is later raised.
@@ -312,13 +324,21 @@ public final class RoleStatus implements Cloneable {
   /**
    * Complete the outstanding AA request (there's no check for one in progress, caller
    * expected to have done that).
-   * @return the number of outstanding requests
    */
   public void completeOutstandingAARequest() {
     setOutstandingAArequest(null);
   }
 
   /**
+   * Cancel any outstanding AA request. Harmless if the role is non-AA, or
+   * if there are no outstanding requests.
+   */
+  public void cancelOutstandingAARequest() {
+    setOutstandingAArequest(null);
+    setPendingAntiAffineRequests(0);
+  }
+
+  /**
    * Get the number of roles we are short of.
    * nodes released are ignored.
    * @return the positive or negative number of roles to add/release.

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index baf88dc..c7f59e3 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -93,8 +93,8 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
 
     List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
     AMRMClient.ContainerRequest request = getSingleRequest(ops)
-    assert request.relaxLocality
-    assert request.nodes == null
+    assert !request.relaxLocality
+    assert request.nodes.size() == engine.cluster.clusterSize
     assert request.racks == null
     assert request.capability
 
@@ -131,6 +131,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     assert appState.onNodeManagerContainerStarted(container.id)
     ops = appState.reviewRequestAndReleaseNodes()
     assert ops.size() == 0
+    assertAllContainersAA();
   }
 
   @Test
@@ -160,6 +161,8 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     assert 1  == submitOperations(ops2, [], ops3).size()
     assert 2 == ops3.size()
     assert aaRole.pendingAntiAffineRequests == 0
+    assertAllContainersAA()
+
   }
 
   @Test
@@ -169,14 +172,17 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
     getSingleRequest(ops)
     assert aaRole.pendingAntiAffineRequests == 1
+    assert aaRole.AARequestOutstanding
 
     // flex down so that the next request should be cancelled
     aaRole.desired = 1
 
-    // expect: no new reqests, pending count --
+    // expect: no new requests, pending count --
     List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
     assert ops2.empty
+    assert aaRole.AARequestOutstanding
     assert aaRole.pendingAntiAffineRequests == 0
+    assertAllContainersAA()
 
     // next iter
     submitOperations(ops, [], ops2).size()
@@ -195,12 +201,14 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     List<AbstractRMOperation> ops = appState.reviewRequestAndReleaseNodes()
     getSingleRequest(ops)
     assert aaRole.pendingAntiAffineRequests == 0
+    assert aaRole.AARequestOutstanding
 
     // flex down so that the next request should be cancelled
     aaRole.desired = 0
     // expect: no new reqests, pending count --
     List<AbstractRMOperation> ops2 = appState.reviewRequestAndReleaseNodes()
     assert aaRole.pendingAntiAffineRequests == 0
+    assert !aaRole.AARequestOutstanding
     assert ops2.size() == 1
     getSingleCancel(ops2)
 
@@ -209,5 +217,22 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     assert 1 == ops2.size()
   }
 
+  void assertAllContainersAA() {
+    assertAllContainersAA(Integer.toString(aaRole.key))
+  }
+
+  /**
+   * Scan through all containers and assert that the assignment is AA
+   * @param index role index
+   */
+  void assertAllContainersAA(String index) {
+    def nodemap = stateAccess.nodeInformationSnapshot
+    nodemap.each { name, info ->
+      def nodeEntry = info.entries[index]
+      assert nodeEntry == null ||
+             (nodeEntry.live + nodeEntry.starting + nodeEntry.releasing)  <= 1 ,
+      "too many instances on node $name"
+    }
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
index f99326f..fdbc3b4 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryAA.groovy
@@ -21,10 +21,15 @@ package org.apache.slider.server.appmaster.model.history
 import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.NodeReport
 import org.apache.hadoop.yarn.api.records.NodeState
+import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.slider.server.appmaster.model.mock.MockFactory
 import org.apache.slider.server.appmaster.model.mock.MockNodeReport
+import org.apache.slider.server.appmaster.model.mock.MockRoleHistory
 import org.apache.slider.server.appmaster.state.NodeEntry
 import org.apache.slider.server.appmaster.state.NodeInstance
 import org.apache.slider.server.appmaster.state.NodeMap
+import org.apache.slider.server.appmaster.state.RoleHistory
+import org.apache.slider.server.appmaster.state.RoleStatus
 import org.apache.slider.test.SliderTestBase
 import org.junit.Test
 
@@ -37,44 +42,48 @@ class TestRoleHistoryAA extends SliderTestBase {
 
   List<String> hostnames = ["one", "two", "three"]
   NodeMap nodeMap, gpuNodeMap
+  RoleHistory roleHistory = new MockRoleHistory(MockFactory.ROLES)
+
+  AMRMClient.ContainerRequest requestContainer(RoleStatus roleStatus) {
+    roleHistory.requestContainerForRole(roleStatus).issuedRequest
+  }
 
   @Override
   void setup() {
     super.setup()
     nodeMap = createNodeMap(hostnames, NodeState.RUNNING)
     gpuNodeMap = createNodeMap(hostnames, NodeState.RUNNING, "GPU")
-
   }
 
   @Test
   public void testFindNodesInFullCluster() throws Throwable {
     // all three will surface at first
-    assertResultSize(3, nodeMap.findNodesForRole(1, ""))
+    assertResultSize(3, nodeMap.findAllNodesForRole(1, ""))
   }
 
   @Test
   public void testFindNodesInUnhealthyCluster() throws Throwable {
     // all three will surface at first
     nodeMap.get("one").updateNode(new MockNodeReport("one",NodeState.UNHEALTHY))
-    assertResultSize(2, nodeMap.findNodesForRole(1, ""))
+    assertResultSize(2, nodeMap.findAllNodesForRole(1, ""))
   }
 
   @Test
   public void testFindNoNodesWrongLabel() throws Throwable {
     // all three will surface at first
-    assertResultSize(0, nodeMap.findNodesForRole(1, "GPU"))
+    assertResultSize(0, nodeMap.findAllNodesForRole(1, "GPU"))
   }
 
   @Test
   public void testFindNoNodesRightLabel() throws Throwable {
     // all three will surface at first
-    assertResultSize(3, gpuNodeMap.findNodesForRole(1, "GPU"))
+    assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, "GPU"))
   }
 
   @Test
   public void testFindNoNodesNoLabel() throws Throwable {
     // all three will surface at first
-    assertResultSize(3, gpuNodeMap.findNodesForRole(1, ""))
+    assertResultSize(3, gpuNodeMap.findAllNodesForRole(1, ""))
   }
 
   @Test
@@ -83,7 +92,7 @@ class TestRoleHistoryAA extends SliderTestBase {
     applyToNodeEntries(nodeMap) {
       NodeEntry it -> it.request()
     }
-    assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+    assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
   }
 
   @Test
@@ -92,7 +101,7 @@ class TestRoleHistoryAA extends SliderTestBase {
     applyToNodeEntries(nodeMap) {
       NodeEntry it -> it.request()
     }
-    assertResultSize(0, nodeMap.findNodesForRole(1, ""))
+    assertResultSize(0, nodeMap.findAllNodesForRole(1, ""))
   }
 
   def assertResultSize(int size, List<NodeInstance> list) {

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
index 63aa6d2..f36724e 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryFindNodesForNewInstances.groovy
@@ -33,10 +33,8 @@ import org.junit.Test
 
 /**
  * Testing finding nodes for new instances.
- * These tests validate the (currently) suboptimal
- * behavior of not listing any known nodes when there
- * are none in the available list -even if there are nodes
- * known to be not running live instances in the cluster.
+ *
+ * This stresses the non-AA codepath
  */
 @Slf4j
 @CompileStatic
@@ -71,7 +69,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
   public List<NodeInstance> findNodes(int count, RoleStatus roleStatus = roleStat) {
     List < NodeInstance > found = [];
     for (int i = 0; i < count; i++) {
-      NodeInstance f = roleHistory.findNodeForNewInstance(roleStatus)
+      NodeInstance f = roleHistory.findRecentNodeForNewInstance(roleStatus)
       if (f) {
         found << f
       };
@@ -81,17 +79,17 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
 
   @Test
   public void testFind1NodeR0() throws Throwable {
-    NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+    NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
     log.info("found: $found")
     assert [age3Active0].contains(found)
   }
 
   @Test
   public void testFind2NodeR0() throws Throwable {
-    NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+    NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
     log.info("found: $found")
     assert [age2Active0, age3Active0].contains(found)
-    NodeInstance found2 = roleHistory.findNodeForNewInstance(roleStat)
+    NodeInstance found2 = roleHistory.findRecentNodeForNewInstance(roleStat)
     log.info("found: $found2")
     assert [age2Active0, age3Active0].contains(found2)
     assert found != found2;
@@ -100,7 +98,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
   @Test
   public void testFind3NodeR0ReturnsNull() throws Throwable {
     assert 2== findNodes(2).size()
-    NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+    NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
     assert found == null;
   }
 
@@ -124,7 +122,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
     assert age2Active0.getActiveRoleInstances(0) != 0
     age3Active0.get(0).onStartCompleted()
     assert age3Active0.getActiveRoleInstances(0) != 0
-    NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+    NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
     log.info(found ?.toFullString())
     assert found == null
   }
@@ -148,7 +146,7 @@ class TestRoleHistoryFindNodesForNewInstances extends BaseMockAppStateTest {
     assert age2Active0.exceedsFailureThreshold(roleStat)
 
     // get the role & expect age3 to be picked up, even though it is older
-    NodeInstance found = roleHistory.findNodeForNewInstance(roleStat)
+    NodeInstance found = roleHistory.findRecentNodeForNewInstance(roleStat)
     assert age3Active0.is(found)
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
index 1c99c04..7b389cd 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryOutstandingRequestTracker.groovy
@@ -240,6 +240,7 @@ class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest  {
     def yarnRequest = req1.buildContainerRequest(resource, workerRole, 0)
     assert (yarnRequest.nodeLabelExpression == null)
     assert (!yarnRequest.relaxLocality)
+    // escalation
     def yarnRequest2 = req1.escalate()
     assert (yarnRequest2.nodeLabelExpression == WORKERS_LABEL)
     assert (yarnRequest2.relaxLocality)
@@ -258,7 +259,6 @@ class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest  {
     resource.virtualCores = 1
     resource.memory = 48;
 
-    def label = null
     // initial request
     def yarnRequest = req1.buildContainerRequest(resource, role0Status, 0)
     assert yarnRequest.nodes != null
@@ -269,6 +269,34 @@ class TestRoleHistoryOutstandingRequestTracker extends BaseMockAppStateTest  {
     assert yarnRequest2.relaxLocality
   }
 
+  @Test(expected = IllegalArgumentException)
+  public void testAARequestNoNodes() throws Throwable {
+    tracker.newAARequest(role0Status.key, [])
+  }
+
+  @Test
+  public void testAARequest() throws Throwable {
+    def role0 = role0Status.key
+    OutstandingRequest request = tracker.newAARequest(role0, [host1])
+    assert host1.hostname == request.hostname
+    assert !request.located
+  }
+
+  @Test
+  public void testAARequestPair() throws Throwable {
+    def role0 = role0Status.key
+    OutstandingRequest request = tracker.newAARequest(role0, [host1, host2])
+    assert host1.hostname == request.hostname
+    assert !request.located
+    def yarnRequest = request.buildContainerRequest(
+        role0Status.copyResourceRequirements(new MockResource(0, 0)),
+        role0Status,
+        0)
+    assert !yarnRequest.relaxLocality
+    assert !request.mayEscalate()
+
+    assert yarnRequest.nodes.size() == 2
+  }
 
   /**
    * Create a new request (always against host1)

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
index 14ac32a..2f160cb 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
@@ -87,7 +87,7 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
 
   @Test
   public void testRequestedNodeOffList() throws Throwable {
-    NodeInstance ni = roleHistory.findNodeForNewInstance(roleStatus)
+    NodeInstance ni = roleHistory.findRecentNodeForNewInstance(roleStatus)
     assert age3Active0 == ni
     assertListEquals([age2Active0], roleHistory.cloneRecentNodeList(0))
     roleHistory.requestInstanceOnNode(ni,
@@ -106,7 +106,7 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
     recordAsFailed(age2Active0, 0, 4)
     assert age2Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
     // expect to get a null node back
-    NodeInstance ni = roleHistory.findNodeForNewInstance(roleStatus)
+    NodeInstance ni = roleHistory.findRecentNodeForNewInstance(roleStatus)
     assert !ni
 
     // which is translated to a no-location request
@@ -123,7 +123,7 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
     assert !age3Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
     assert !roleHistory.cloneRecentNodeList(0).empty
     // looking for a node should now find one
-    ni = roleHistory.findNodeForNewInstance(roleStatus)
+    ni = roleHistory.findRecentNodeForNewInstance(roleStatus)
     assert ni == age3Active0
     req = roleHistory.requestInstanceOnNode(ni, roleStatus, resource).issuedRequest
     assert 1 == req.nodes.size()
@@ -153,13 +153,13 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
     assert recentRole0.indexOf(age3Active0) < recentRole0.indexOf(age2Active0)
 
     // the non-strict role has no suitable nodes
-    assert null == roleHistory.findNodeForNewInstance(role0Status)
+    assert null == roleHistory.findRecentNodeForNewInstance(role0Status)
 
 
-    def ni = roleHistory.findNodeForNewInstance(targetRole)
+    def ni = roleHistory.findRecentNodeForNewInstance(targetRole)
     assert ni
 
-    def ni2 = roleHistory.findNodeForNewInstance(targetRole)
+    def ni2 = roleHistory.findRecentNodeForNewInstance(targetRole)
     assert ni2
     assert ni != ni2
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/7899f59a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index 14e556a..e1660ee 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -42,8 +42,10 @@ import org.apache.slider.server.appmaster.state.ContainerAssignment
 import org.apache.slider.server.appmaster.state.ContainerOutcome
 import org.apache.slider.server.appmaster.state.NodeEntry
 import org.apache.slider.server.appmaster.state.NodeInstance
+import org.apache.slider.server.appmaster.state.ProviderAppState
 import org.apache.slider.server.appmaster.state.RoleInstance
 import org.apache.slider.server.appmaster.state.RoleStatus
+import org.apache.slider.server.appmaster.state.StateAccessForProviders
 import org.apache.slider.test.SliderTestBase
 import org.junit.Before
 
@@ -59,6 +61,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
   protected Path historyPath;
   protected MockApplicationId applicationId;
   protected MockApplicationAttemptId applicationAttemptId;
+  protected StateAccessForProviders stateAccess
 
   /**
    * Override point: called in setup() to create the YARN engine; can
@@ -97,6 +100,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
     fs.delete(historyPath, true)
     appState = new MockAppState()
     appState.buildInstance(buildBindingInfo())
+    stateAccess = new ProviderAppState(testName, appState)
   }
 
   /**


[05/12] incubator-slider git commit: Merge branch 'develop' into feature/SLIDER-82-pass-3.1

Posted by st...@apache.org.
Merge branch 'develop' into feature/SLIDER-82-pass-3.1


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/856ab847
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/856ab847
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/856ab847

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 856ab847ae1978c3c2a1ac600ed43c5465809e59
Parents: 2606192 c4e7329
Author: Steve Loughran <st...@apache.org>
Authored: Wed Nov 11 13:58:03 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Wed Nov 11 13:58:03 2015 +0000

----------------------------------------------------------------------
 pom.xml                                                   |  7 +++++++
 slider-core/pom.xml                                       |  1 +
 .../main/java/org/apache/slider/client/SliderClient.java  | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/856ab847/slider-core/src/main/java/org/apache/slider/client/SliderClient.java
----------------------------------------------------------------------


[11/12] incubator-slider git commit: Merge branch 'develop' into feature/SLIDER-82-pass-3.1

Posted by st...@apache.org.
Merge branch 'develop' into feature/SLIDER-82-pass-3.1


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/6b13042c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/6b13042c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/6b13042c

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 6b13042c66c0f8d0914ee18f9abddd170f6ef512
Parents: 4754088 b201de8
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 14:02:33 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 14:02:33 2015 +0000

----------------------------------------------------------------------
 pom.xml                                             |  8 ++++++--
 .../slider/server/appmaster/SliderAppMaster.java    |  2 +-
 .../server/appmaster/monkey/ChaosKillContainer.java | 16 +++++++---------
 3 files changed, 14 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6b13042c/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------


[10/12] incubator-slider git commit: SLIDER-981 Build and test slider against Hadoop 2.7.2 RC0 -allow for staging URL to be defined on CLI

Posted by st...@apache.org.
 SLIDER-981 Build and test slider against Hadoop 2.7.2 RC0 -allow for staging URL to be defined on CLI


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/b201de8a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/b201de8a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/b201de8a

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: b201de8ade387dbd5550b35060ca001d2af4362b
Parents: ca6f9ce
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 13:59:36 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 14:01:41 2015 +0000

----------------------------------------------------------------------
 pom.xml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/b201de8a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 866c448..289993c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -234,17 +234,21 @@
     <github.site.plugin.version>0.8</github.site.plugin.version>
     <maven-site-plugin.skipDeploy>true</maven-site-plugin.skipDeploy>
     <github.downloads.plugin.version>0.6</github.downloads.plugin.version>
+    
+    <!-- override point for ASF staging/snapshot repos -->
+    <asf.staging>https://repository.apache.org/content/groups/staging/</asf.staging>
+    <asf.snapshots>https://repository.apache.org/content/repositories/snapshots/</asf.snapshots>
   </properties>
 
 
   <repositories>
     <repository>
       <id>ASF Staging</id>
-      <url>https://repository.apache.org/content/groups/staging/</url>
+      <url>${asf.staging}</url>
     </repository>
     <repository>
       <id>ASF Snapshots</id>
-      <url>https://repository.apache.org/content/repositories/snapshots/</url>
+      <url>${asf.snapshots}</url>
       <snapshots>
         <enabled>true</enabled>
       </snapshots>


[08/12] incubator-slider git commit: SLIDER-967 First AA placement tests all working

Posted by st...@apache.org.
SLIDER-967 First AA placement tests all working


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/47540882
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/47540882
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/47540882

Branch: refs/heads/feature/SLIDER-82-pass-3.1
Commit: 47540882e15af473281eee477ea3253d5bef5e58
Parents: a7ba72e
Author: Steve Loughran <st...@apache.org>
Authored: Thu Nov 12 13:43:21 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Nov 12 13:43:21 2015 +0000

----------------------------------------------------------------------
 .../slider/server/appmaster/state/AppState.java | 22 ++++++++++----------
 .../server/appmaster/state/RoleHistory.java     |  2 --
 .../appstate/TestMockAppStateAAPlacement.groovy | 11 ++++++----
 3 files changed, 18 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/47540882/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index c960510..0c66e25 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -2187,17 +2187,6 @@ public class AppState {
       // add all requests to the operations list
       operations.addAll(allocation.operations);
 
-      // now for AA requests, add some more
-      if (role.isAntiAffinePlacement())  {
-        role.completeOutstandingAARequest();
-        if (role.getPendingAntiAffineRequests() > 0) {
-          // still an outstanding AA request: need to issue a new one.
-          log.info("Asking for next container for AA role {}", roleName);
-          role.decPendingAntiAffineRequests();
-          addContainerRequest(operations, createContainerRequest(role));
-        }
-      }
-
       //look for condition where we get more back than we asked
       if (allocated > desired) {
         log.info("Discarding surplus {} container {} on {}", roleName,  cid,
@@ -2228,6 +2217,17 @@ public class AppState {
         if (request != null) {
           operations.add(request);
         }
+        // now for AA requests, add some more
+        if (role.isAntiAffinePlacement()) {
+          role.completeOutstandingAARequest();
+          if (role.getPendingAntiAffineRequests() > 0) {
+            // still an outstanding AA request: need to issue a new one.
+            log.info("Asking for next container for AA role {}", roleName);
+            role.decPendingAntiAffineRequests();
+            addContainerRequest(operations, createContainerRequest(role));
+          }
+        }
+
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/47540882/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 8a840fc..d7e6050 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -749,8 +749,6 @@ public class RoleHistory {
         sortRecentNodeList(role);
       }
     }
-    // TODO: AA placement: now request a new node
-
     return outcome;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/47540882/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
index 64c0362..c98f3bf 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAAPlacement.groovy
@@ -30,7 +30,7 @@ import org.apache.slider.server.appmaster.model.mock.MockRoles
 import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.AppStateBindingInfo
 import org.apache.slider.server.appmaster.state.ContainerAssignment
-import org.apache.slider.server.appmaster.state.NodeInstance
+import org.apache.slider.server.appmaster.state.NodeMap
 import org.apache.slider.server.appmaster.state.RoleInstance
 import org.apache.slider.server.appmaster.state.RoleStatus
 import org.junit.Test
@@ -85,8 +85,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
 
   @Test
   public void testAllocateAANoLabel() throws Throwable {
-    def nodemap = appState.roleHistory.cloneNodemap()
-    assert nodemap.size() > 0
+    assert cloneNodemap().size() > 0
 
 
     // want multiple instances, so there will be iterations
@@ -107,7 +106,7 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     appState.onContainersAllocated([allocated], assignments, operations)
 
     def host = allocated.nodeId.host
-    def hostInstance = nodemap.get(host)
+    def hostInstance = cloneNodemap().get(host)
     assert hostInstance.get(aaRole.key).starting == 1
     assert !hostInstance.canHost(aaRole.key, "")
     assert !hostInstance.canHost(aaRole.key, null)
@@ -146,6 +145,10 @@ class TestMockAppStateAAPlacement extends BaseMockAppStateTest
     assertAllContainersAA();
   }
 
+  protected NodeMap cloneNodemap() {
+    appState.roleHistory.cloneNodemap()
+  }
+
   @Test
   public void testAllocateFlexUp() throws Throwable {
     // want multiple instances, so there will be iterations