You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2015/11/01 12:49:17 UTC

[05/14] incubator-slider git commit: SLIDER-947 build node map from yarn update reports; serve via REST/IPC. API done; now trying to make sure RM notifies AM of state, which is being checked via new metrics

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index 0f77824..eadb1dc 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -85,23 +85,9 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import static org.apache.slider.api.ResourceKeys.DEF_YARN_CORES;
-import static org.apache.slider.api.ResourceKeys.DEF_YARN_LABEL_EXPRESSION;
-import static org.apache.slider.api.ResourceKeys.DEF_YARN_MEMORY;
-import static org.apache.slider.api.ResourceKeys.YARN_CORES;
-import static org.apache.slider.api.ResourceKeys.YARN_LABEL_EXPRESSION;
-import static org.apache.slider.api.ResourceKeys.YARN_MEMORY;
-import static org.apache.slider.api.RoleKeys.ROLE_FAILED_INSTANCES;
-import static org.apache.slider.api.RoleKeys.ROLE_FAILED_RECENTLY_INSTANCES;
-import static org.apache.slider.api.RoleKeys.ROLE_FAILED_STARTING_INSTANCES;
-import static org.apache.slider.api.RoleKeys.ROLE_NODE_FAILED_INSTANCES;
-import static org.apache.slider.api.RoleKeys.ROLE_PREEMPTED_INSTANCES;
-import static org.apache.slider.api.RoleKeys.ROLE_RELEASING_INSTANCES;
-import static org.apache.slider.api.RoleKeys.ROLE_REQUESTED_INSTANCES;
-import static org.apache.slider.api.StateValues.STATE_CREATED;
-import static org.apache.slider.api.StateValues.STATE_DESTROYED;
-import static org.apache.slider.api.StateValues.STATE_LIVE;
-import static org.apache.slider.api.StateValues.STATE_SUBMITTED;
+import static org.apache.slider.api.ResourceKeys.*;
+import static org.apache.slider.api.RoleKeys.*;
+import static org.apache.slider.api.StateValues.*;
 
 
 /**
@@ -214,7 +200,6 @@ public class AppState {
 
   /**
    *   Count of failed containers
-
    */
   private final Counter failedContainerCount = new Counter();
 
@@ -244,21 +229,21 @@ public class AppState {
    * resources, etc. When container started callback is received,
    * the node is promoted from here to the containerMap
    */
-  private final Map<ContainerId, RoleInstance> startingNodes =
+  private final Map<ContainerId, RoleInstance> startingContainers =
     new ConcurrentHashMap<>();
 
   /**
    * List of completed nodes. This isn't kept in the CD as it gets too
    * big for the RPC responses. Indeed, we should think about how deep to get this
    */
-  private final Map<ContainerId, RoleInstance> completedNodes
+  private final Map<ContainerId, RoleInstance> completedContainers
     = new ConcurrentHashMap<>();
 
   /**
    * Nodes that failed to start.
    * Again, kept out of the CD
    */
-  private final Map<ContainerId, RoleInstance> failedNodes =
+  private final Map<ContainerId, RoleInstance> failedContainers =
     new ConcurrentHashMap<>();
 
   /**
@@ -324,8 +309,7 @@ public class AppState {
 
   private void register(String name, Counter counter) {
     this.metricsAndMonitoring.getMetrics().register(
-        MetricRegistry.name(AppState.class,
-            name), counter);
+        MetricRegistry.name(AppState.class, name), counter);
   }
 
   public long getFailedCountainerCount() {
@@ -382,21 +366,20 @@ public class AppState {
     return rolePriorityMap;
   }
 
-  private Map<ContainerId, RoleInstance> getStartingNodes() {
-    return startingNodes;
+  private Map<ContainerId, RoleInstance> getStartingContainers() {
+    return startingContainers;
   }
 
-  private Map<ContainerId, RoleInstance> getCompletedNodes() {
-    return completedNodes;
+  private Map<ContainerId, RoleInstance> getCompletedContainers() {
+    return completedContainers;
   }
 
 
-  public Map<ContainerId, RoleInstance> getFailedNodes() {
-    return failedNodes;
+  public Map<ContainerId, RoleInstance> getFailedContainers() {
+    return failedContainers;
   }
 
-
-  public Map<ContainerId, RoleInstance> getLiveNodes() {
+  public Map<ContainerId, RoleInstance> getLiveContainers() {
     return liveNodes;
   }
 
@@ -592,8 +575,9 @@ public class AppState {
     initClusterStatus();
 
 
-    // add the roles
+    // set up the role history
     roleHistory = new RoleHistory(providerRoles);
+    roleHistory.register(metricsAndMonitoring);
     roleHistory.onStart(fs, historyDir);
 
     //rebuild any live containers
@@ -871,11 +855,11 @@ public class AppState {
     RoleInstance am = new RoleInstance(container);
     am.role = SliderKeys.COMPONENT_AM;
     am.roleId = SliderKeys.ROLE_AM_PRIORITY_INDEX;
-    am.createTime = System.currentTimeMillis();
-    am.startTime = System.currentTimeMillis();
+    am.createTime =now();
+    am.startTime = am.createTime;
     appMasterNode = am;
     //it is also added to the set of live nodes
-    getLiveNodes().put(containerId, am);
+    getLiveContainers().put(containerId, am);
     putOwnedContainer(containerId, am);
     
     // patch up the role status
@@ -894,7 +878,7 @@ public class AppState {
    * and it will just cause confusion
    */
   public void noteAMLaunched() {
-    getLiveNodes().put(appMasterNode.getContainerId(), appMasterNode);
+    getLiveContainers().put(appMasterNode.getContainerId(), appMasterNode);
   }
 
   /**
@@ -1002,7 +986,7 @@ public class AppState {
    */
   public synchronized List<RoleInstance> cloneLiveContainerInfoList() {
     List<RoleInstance> allRoleInstances;
-    Collection<RoleInstance> values = getLiveNodes().values();
+    Collection<RoleInstance> values = getLiveContainers().values();
     allRoleInstances = new ArrayList<>(values);
     return allRoleInstances;
   }
@@ -1015,7 +999,7 @@ public class AppState {
    */
   public synchronized RoleInstance getLiveInstanceByContainerID(String containerId)
       throws NoSuchNodeException {
-    Collection<RoleInstance> nodes = getLiveNodes().values();
+    Collection<RoleInstance> nodes = getLiveContainers().values();
     return findNodeInCollection(containerId, nodes);
   }
 
@@ -1062,7 +1046,7 @@ public class AppState {
     //first, a hashmap of those containerIDs is built up
     Set<String> uuidSet = new HashSet<String>(containerIDs);
     List<RoleInstance> nodes = new ArrayList<RoleInstance>(uuidSet.size());
-    Collection<RoleInstance> clusterNodes = getLiveNodes().values();
+    Collection<RoleInstance> clusterNodes = getLiveContainers().values();
 
     for (RoleInstance node : clusterNodes) {
       if (uuidSet.contains(node.id)) {
@@ -1080,7 +1064,7 @@ public class AppState {
    */
   public synchronized List<RoleInstance> enumLiveNodesInRole(String role) {
     List<RoleInstance> nodes = new ArrayList<RoleInstance>();
-    Collection<RoleInstance> allRoleInstances = getLiveNodes().values();
+    Collection<RoleInstance> allRoleInstances = getLiveContainers().values();
     for (RoleInstance node : allRoleInstances) {
       if (role.isEmpty() || role.equals(node.role)) {
         nodes.add(node);
@@ -1117,7 +1101,7 @@ public class AppState {
    */
   private synchronized Map<String, List<String>> createRoleToInstanceMap() {
     Map<String, List<String>> map = new HashMap<String, List<String>>();
-    for (RoleInstance node : getLiveNodes().values()) {
+    for (RoleInstance node : getLiveContainers().values()) {
       List<String> containers = map.get(node.role);
       if (containers == null) {
         containers = new ArrayList<String>();
@@ -1136,7 +1120,7 @@ public class AppState {
    */
   public synchronized Map<String, Map<String, ClusterNode>> createRoleToClusterNodeMap() {
     Map<String, Map<String, ClusterNode>> map = new HashMap<>();
-    for (RoleInstance node : getLiveNodes().values()) {
+    for (RoleInstance node : getLiveContainers().values()) {
       
       Map<String, ClusterNode> containers = map.get(node.role);
       if (containers == null) {
@@ -1160,7 +1144,7 @@ public class AppState {
     instance.state = STATE_SUBMITTED;
     instance.container = container;
     instance.createTime = now();
-    getStartingNodes().put(container.getId(), instance);
+    getStartingContainers().put(container.getId(), instance);
     putOwnedContainer(container.getId(), instance);
     roleHistory.onContainerStartSubmitted(container, instance);
   }
@@ -1354,7 +1338,7 @@ public class AppState {
       throw new RuntimeException(
         "Unknown role for node " + node);
     }
-    getLiveNodes().put(node.getContainerId(), node);
+    getLiveContainers().put(node.getContainerId(), node);
     //tell role history
     roleHistory.onContainerStarted(container);
   }
@@ -1395,7 +1379,7 @@ public class AppState {
                                      instance);
     }
     instance.startTime = now();
-    RoleInstance starting = getStartingNodes().remove(containerId);
+    RoleInstance starting = getStartingContainers().remove(containerId);
     if (null == starting) {
       throw new YarnRuntimeException(
         "Container "+ containerId +"%s is already started");
@@ -1423,7 +1407,7 @@ public class AppState {
     removeOwnedContainer(containerId);
     incFailedCountainerCount();
     incStartFailedCountainerCount();
-    RoleInstance instance = getStartingNodes().remove(containerId);
+    RoleInstance instance = getStartingContainers().remove(containerId);
     if (null != instance) {
       RoleStatus roleStatus = lookupRoleStatus(instance.roleId);
       String text;
@@ -1434,7 +1418,7 @@ public class AppState {
       }
       instance.diagnostics = text;
       roleStatus.noteFailed(true, text, ContainerOutcome.Failed);
-      getFailedNodes().put(containerId, instance);
+      getFailedContainers().put(containerId, instance);
       roleHistory.onNodeManagerContainerStartFailed(instance.container);
     }
   }
@@ -1547,11 +1531,11 @@ public class AppState {
       if (roleInstance != null) {
         //it was active, move it to failed 
         incFailedCountainerCount();
-        failedNodes.put(containerId, roleInstance);
+        failedContainers.put(containerId, roleInstance);
       } else {
         // the container may have been noted as failed already, so look
         // it up
-        roleInstance = failedNodes.get(containerId);
+        roleInstance = failedContainers.get(containerId);
       }
       if (roleInstance != null) {
         int roleId = roleInstance.roleId;
@@ -1605,12 +1589,12 @@ public class AppState {
     //remove the node
     ContainerId id = status.getContainerId();
     log.info("Removing node ID {}", id);
-    RoleInstance node = getLiveNodes().remove(id);
+    RoleInstance node = getLiveContainers().remove(id);
     if (node != null) {
       node.state = STATE_DESTROYED;
       node.exitCode = exitStatus;
       node.diagnostics = status.getDiagnostics();
-      getCompletedNodes().put(id, node);
+      getCompletedContainers().put(id, node);
       result.roleInstance = node;
     } else {
       // not in the list
@@ -1624,7 +1608,7 @@ public class AppState {
     // finally, verify the node doesn't exist any more
     assert !containersBeingReleased.containsKey(
         containerId) : "container still in release queue";
-    assert !getLiveNodes().containsKey(
+    assert !getLiveContainers().containsKey(
         containerId) : " container still in live nodes";
     assert getOwnedContainer(containerId) ==
            null : "Container still in active container list";

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
index 0508579..82b2f2a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/ProviderAppState.java
@@ -25,6 +25,7 @@ import org.apache.slider.api.ClusterDescription;
 import org.apache.slider.api.ClusterNode;
 import org.apache.slider.api.types.ApplicationLivenessInformation;
 import org.apache.slider.api.types.ComponentInformation;
+import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.core.conf.AggregateConf;
 import org.apache.slider.core.conf.ConfTreeOperations;
 import org.apache.slider.core.exceptions.NoSuchNodeException;
@@ -118,13 +119,13 @@ public class ProviderAppState implements StateAccessForProviders {
 
 
   @Override
-  public Map<ContainerId, RoleInstance> getFailedNodes() {
-    return appState.getFailedNodes();
+  public Map<ContainerId, RoleInstance> getFailedContainers() {
+    return appState.getFailedContainers();
   }
 
   @Override
-  public Map<ContainerId, RoleInstance> getLiveNodes() {
-    return appState.getLiveNodes();
+  public Map<ContainerId, RoleInstance> getLiveContainers() {
+    return appState.getLiveContainers();
   }
 
   @Override
@@ -249,10 +250,10 @@ public class ProviderAppState implements StateAccessForProviders {
   }
 
   @Override
-  public List<RoleInstance> enumLiveNodesInRole(String role) {
-    List<RoleInstance> nodes = new ArrayList<RoleInstance>();
+  public List<RoleInstance> enumLiveInstancesInRole(String role) {
+    List<RoleInstance> nodes = new ArrayList<>();
     Collection<RoleInstance> allRoleInstances = cloneLiveContainerInfoList();
-        getLiveNodes().values();
+        getLiveContainers().values();
     for (RoleInstance node : allRoleInstances) {
       if (role.isEmpty() || role.equals(node.role)) {
         nodes.add(node);
@@ -265,8 +266,7 @@ public class ProviderAppState implements StateAccessForProviders {
   public List<RoleInstance> lookupRoleContainers(String component) {
     RoleStatus roleStatus = lookupRoleStatus(component);
     List<RoleInstance> ownedContainerList = cloneOwnedContainerList();
-    List<RoleInstance> matching =
-        new ArrayList<>(ownedContainerList.size());
+    List<RoleInstance> matching = new ArrayList<>(ownedContainerList.size());
     int roleId = roleStatus.getPriority();
     for (RoleInstance instance : ownedContainerList) {
       if (instance.roleId == roleId) {
@@ -286,7 +286,16 @@ public class ProviderAppState implements StateAccessForProviders {
       info.containers.add(container.id);
     }
     return info;
+  }
 
+  @Override
+  public Map<String, NodeInformation> getNodeInformationSnapshot() {
+    return appState.getRoleHistory().getNodeInformationSnapshot();
+  }
+
+  @Override
+  public NodeInformation getNodeInformation(String hostname) {
+    return appState.getRoleHistory().getNodeInformation(hostname);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index f1e26bb..c22d517 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -26,9 +26,14 @@ import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.client.api.AMRMClient;
+import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.common.tools.SliderUtils;
 import org.apache.slider.core.exceptions.BadConfigException;
 import org.apache.slider.providers.ProviderRole;
+import org.apache.slider.server.appmaster.management.BoolMetric;
+import org.apache.slider.server.appmaster.management.LongGauge;
+import org.apache.slider.server.appmaster.management.MetricsAndMonitoring;
+import org.apache.slider.server.appmaster.management.Timestamp;
 import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
 import org.apache.slider.server.avro.LoadedRoleHistory;
 import org.apache.slider.server.avro.NodeEntryRecord;
@@ -69,18 +74,25 @@ public class RoleHistory {
   private long startTime;
 
   /** Time when saved */
-  private long saveTime;
+  private final Timestamp saveTime = new Timestamp(0);
 
   /** If the history was loaded, the time at which the history was saved */
-  private long thawedDataTime;
+  private final Timestamp thawedDataTime = new Timestamp(0);
   
   private NodeMap nodemap;
   private int roleSize;
-  private boolean dirty;
+  private final BoolMetric dirty = new BoolMetric(false);
   private FileSystem filesystem;
   private Path historyPath;
   private RoleHistoryWriter historyWriter = new RoleHistoryWriter();
 
+  /**
+   * When were the nodes updated in a {@link #onNodesUpdated(List)} call.
+   * If zero: never
+   */
+  private final Timestamp nodesUpdatedTime = new Timestamp(0);
+  private final BoolMetric nodeUpdateReceived = new BoolMetric(false);
+
   private OutstandingRequestTracker outstandingRequests =
     new OutstandingRequestTracker();
 
@@ -97,8 +109,7 @@ public class RoleHistory {
   private Set<String> failedNodes = new HashSet<>();
 
 
-  public RoleHistory(List<ProviderRole> providerRoles) throws
-                                                       BadConfigException {
+  public RoleHistory(List<ProviderRole> providerRoles) throws BadConfigException {
     this.providerRoles = providerRoles;
     roleSize = providerRoles.size();
     reset();
@@ -106,15 +117,16 @@ public class RoleHistory {
 
   /**
    * Reset the variables -this does not adjust the fixed attributes
-   * of the history
+   * of the history, but the nodemap and failed node map are cleared.
    */
   protected synchronized void reset() throws BadConfigException {
 
     nodemap = new NodeMap(roleSize);
+    failedNodes = new HashSet<>();
     resetAvailableNodeLists();
 
     outstandingRequests = new OutstandingRequestTracker();
-    
+
     Map<Integer, RoleStatus> roleStats = new HashMap<>();
     for (ProviderRole providerRole : providerRoles) {
       checkProviderRole(roleStats, providerRole);
@@ -122,6 +134,18 @@ public class RoleHistory {
   }
 
   /**
+   * Register all metrics with the metrics infra
+   * @param metrics metrics
+   */
+  public void register(MetricsAndMonitoring metrics) {
+    metrics.register(RoleHistory.class, dirty, "dirty");
+    metrics.register(RoleHistory.class, nodesUpdatedTime, "nodes-updated.time");
+    metrics.register(RoleHistory.class, nodeUpdateReceived, "nodes-updated.flag");
+    metrics.register(RoleHistory.class, thawedDataTime, "thawed.time");
+    metrics.register(RoleHistory.class, saveTime, "saved.time");
+  }
+
+  /**
    * safety check: make sure the provider role is unique amongst
    * the role stats...which is extended with the new role
    * @param roleStats role stats
@@ -145,7 +169,6 @@ public class RoleHistory {
     roleStats.put(index, new RoleStatus(providerRole));
   }
 
-
   /**
    * Add a new provider role to the map
    * @param providerRole new provider role
@@ -187,8 +210,7 @@ public class RoleHistory {
   }
 
   /**
-   * Reset the variables -this does not adjust the fixed attributes
-   * of the history.
+   * Prepare the history for re-reading its state.
    * <p>
    * This intended for use by the RoleWriter logic.
    * @throws BadConfigException if there is a problem rebuilding the state
@@ -243,21 +265,20 @@ public class RoleHistory {
     return discarded;
   }
 
-
   public synchronized long getStartTime() {
     return startTime;
   }
 
   public synchronized long getSaveTime() {
-    return saveTime;
+    return saveTime.get();
   }
 
   public long getThawedDataTime() {
-    return thawedDataTime;
+    return thawedDataTime.get();
   }
 
   public void setThawedDataTime(long thawedDataTime) {
-    this.thawedDataTime = thawedDataTime;
+    this.thawedDataTime.set(thawedDataTime);
   }
 
   public synchronized int getRoleSize() {
@@ -273,11 +294,11 @@ public class RoleHistory {
   }
 
   public synchronized boolean isDirty() {
-    return dirty;
+    return dirty.get();
   }
 
   public synchronized void setDirty(boolean dirty) {
-    this.dirty = dirty;
+    this.dirty.set(dirty);
   }
 
   /**
@@ -285,8 +306,8 @@ public class RoleHistory {
    * @param timestamp timestamp -updates the savetime field
    */
   public synchronized void saved(long timestamp) {
-    dirty = false;
-    saveTime = timestamp;
+    setDirty(false);
+    saveTime.set(timestamp);
   }
 
   /**
@@ -299,6 +320,29 @@ public class RoleHistory {
   }
 
   /**
+   * Get snapshot of the node map
+   * @return a snapshot of the current node state
+   */
+  public Map<String, NodeInformation> getNodeInformationSnapshot() {
+    NodeMap map = cloneNodemap();
+    Map<String, NodeInformation> result = new HashMap<>(map.size());
+    for (Map.Entry<String, NodeInstance> entry : map.entrySet()) {
+      result.put(entry.getKey(), entry.getValue().serialize());
+    }
+    return result;
+  }
+
+  /**
+   * Get the information on a node
+   * @param hostname hostname
+   * @return the information about that host, or null if there is none
+   */
+  public NodeInformation getNodeInformation(String hostname) {
+    NodeInstance nodeInstance = nodemap.get(hostname);
+    return nodeInstance != null ? nodeInstance.serialize() : null;
+  }
+
+  /**
    * Get the node instance for the specific node -creating it if needed
    * @param hostname node address
    * @return the instance
@@ -379,8 +423,7 @@ public class RoleHistory {
    */
   public synchronized Path saveHistoryIfDirty() throws IOException {
     if (isDirty()) {
-      long time = now();
-      return saveHistory(time);
+      return saveHistory(now());
     } else {
       return null;
     }
@@ -403,7 +446,7 @@ public class RoleHistory {
     }
   
   /**
-   * Handler for bootstrap event
+   * Handler for bootstrap event: there was no history to thaw
    */
   public void onBootstrap() {
     log.debug("Role history bootstrapped");
@@ -746,12 +789,22 @@ public class RoleHistory {
   }
 
   /**
+   * Get the last time the nodes were updated from YARN
+   * @return the update time or zero if never updated.
+   */
+  public long getNodesUpdatedTime() {
+    return nodesUpdatedTime.get();
+  }
+
+  /**
    * Update failedNodes and nodemap based on the node state
    * 
    * @param updatedNodes list of updated nodes
    */
   public synchronized void onNodesUpdated(List<NodeReport> updatedNodes) {
     log.debug("Updating {} nodes", updatedNodes.size());
+    nodesUpdatedTime.set(now());
+    nodeUpdateReceived.set(true);
     for (NodeReport updatedNode : updatedNodes) {
       String hostname = updatedNode.getNodeId() == null
                         ? null 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
index 56c7cac..2fc00b2 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/StateAccessForProviders.java
@@ -26,6 +26,7 @@ import org.apache.slider.api.ClusterNode;
 import org.apache.slider.api.StatusKeys;
 import org.apache.slider.api.types.ApplicationLivenessInformation;
 import org.apache.slider.api.types.ComponentInformation;
+import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.core.conf.AggregateConf;
 import org.apache.slider.core.conf.ConfTreeOperations;
 import org.apache.slider.core.exceptions.NoSuchNodeException;
@@ -87,17 +88,17 @@ public interface StateAccessForProviders {
   List<String> listConfigSets();
 
   /**
-   * Get a map of all the failed nodes
-   * @return map of recorded failed notes
+   * Get a map of all the failed containers
+   * @return map of recorded failed containers
    */
-  Map<ContainerId, RoleInstance> getFailedNodes();
+  Map<ContainerId, RoleInstance> getFailedContainers();
 
   /**
-   * Get the live nodes.
+   * Get the live containers.
    * 
    * @return the live nodes
    */
-  Map<ContainerId, RoleInstance> getLiveNodes();
+  Map<ContainerId, RoleInstance> getLiveContainers();
 
   /**
    * Get the current cluster description 
@@ -268,11 +269,11 @@ public interface StateAccessForProviders {
   Map<String, Map<String, ClusterNode>> getRoleClusterNodeMapping();
 
   /**
-   * Enum all nodes by role. 
+   * Enum all role instances by role.
    * @param role role, or "" for all roles
-   * @return a list of nodes, may be empty
+   * @return a list of instances, may be empty
    */
-  List<RoleInstance> enumLiveNodesInRole(String role);
+  List<RoleInstance> enumLiveInstancesInRole(String role);
 
   /**
    * Look up all containers of a specific component name 
@@ -287,4 +288,19 @@ public interface StateAccessForProviders {
    * @return a structure describing the component.
    */
   ComponentInformation getComponentInformation(String component);
+
+
+  /**
+   * Get a clone of the nodemap.
+   * The instances inside are not cloned
+   * @return a possibly empty map of hostname top info
+   */
+  Map<String, NodeInformation> getNodeInformationSnapshot();
+
+  /**
+   * get information on a node
+   * @param hostname hostname to look up
+   * @return the information, or null if there is no information held.
+   */
+  NodeInformation getNodeInformation(String hostname);
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
index 0730a21..a0fe310 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/WebAppApiImpl.java
@@ -89,8 +89,7 @@ public class WebAppApiImpl implements WebAppApi {
   @Override
   public Map<String,RoleStatus> getRoleStatusByName() {
     List<RoleStatus> roleStatuses = appState.cloneRoleStatusList();
-    Map<String, RoleStatus> map =
-        new TreeMap<String, RoleStatus>();
+    Map<String, RoleStatus> map = new TreeMap<>();
     for (RoleStatus status : roleStatuses) {
       map.put(status.getName(), status);
     }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/RestPaths.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/RestPaths.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/RestPaths.java
index 06b7ba2..424107c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/RestPaths.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/RestPaths.java
@@ -116,7 +116,6 @@ public class RestPaths {
    */
   public static final String SYSTEM_THREADS = SYSTEM + "/threads";
 
-
   /**
    * application subpath
    */
@@ -134,7 +133,7 @@ public class RestPaths {
   public static final String LIVE_RESOURCES = "/live/resources";
   public static final String LIVE_CONTAINERS = "/live/containers";
   public static final String LIVE_COMPONENTS = "/live/components";
-  public static final String LIVE_NODES = "/live/";
+  public static final String LIVE_NODES = "/live/nodes";
   public static final String LIVE_LIVENESS = "/live/liveness";
   public static final String LIVE_STATISTICS = "/live/statistics";
   public static final String MODEL = "/model";

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResouceContentCacheFactory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResouceContentCacheFactory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResouceContentCacheFactory.java
index e7b8fc7..2facf16 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResouceContentCacheFactory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResouceContentCacheFactory.java
@@ -18,10 +18,6 @@
 
 package org.apache.slider.server.appmaster.web.rest.application;
 
-import org.apache.slider.api.types.ComponentInformation;
-import org.apache.slider.api.types.ContainerInformation;
-import org.apache.slider.core.conf.AggregateConf;
-import org.apache.slider.core.conf.ConfTree;
 import org.apache.slider.server.appmaster.state.StateAccessForProviders;
 import org.apache.slider.server.appmaster.web.rest.application.resources.AggregateModelRefresher;
 import org.apache.slider.server.appmaster.web.rest.application.resources.AppconfRefresher;
@@ -29,63 +25,39 @@ import org.apache.slider.server.appmaster.web.rest.application.resources.CachedC
 import org.apache.slider.server.appmaster.web.rest.application.resources.ContentCache;
 import org.apache.slider.server.appmaster.web.rest.application.resources.LiveComponentsRefresher;
 import org.apache.slider.server.appmaster.web.rest.application.resources.LiveContainersRefresher;
+import org.apache.slider.server.appmaster.web.rest.application.resources.LiveNodesRefresher;
 import org.apache.slider.server.appmaster.web.rest.application.resources.LiveResourcesRefresher;
 import org.apache.slider.server.appmaster.web.rest.application.resources.LiveStatisticsRefresher;
 
-import java.util.Map;
-
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.LIVE_COMPONENTS;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.LIVE_CONTAINERS;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.LIVE_RESOURCES;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.LIVE_STATISTICS;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.MODEL_DESIRED;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.MODEL_DESIRED_APPCONF;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.MODEL_DESIRED_RESOURCES;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.MODEL_RESOLVED;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.MODEL_RESOLVED_APPCONF;
-import static org.apache.slider.server.appmaster.web.rest.RestPaths.MODEL_RESOLVED_RESOURCES;
+import static org.apache.slider.server.appmaster.web.rest.RestPaths.*;
 
 public class ApplicationResouceContentCacheFactory {
   public static final int LIFESPAN = 500;
 
   /**
    * Build the content cache
-   * @param cache cache to construct
    * @param state state view
    */
   public static ContentCache createContentCache(
       StateAccessForProviders state) {
     ContentCache cache = new ContentCache();
-    cache.put(LIVE_RESOURCES,
-        new CachedContent<ConfTree>(LIFESPAN,
-            new LiveResourcesRefresher(state)));
-    cache.put(LIVE_CONTAINERS,
-        new CachedContent<Map<String, ContainerInformation>>(LIFESPAN,
-            new LiveContainersRefresher(state)));
-    cache.put(LIVE_COMPONENTS,
-        new CachedContent<Map<String, ComponentInformation>>(LIFESPAN,
-            new LiveComponentsRefresher(state)));
+    cache.put(LIVE_RESOURCES, new CachedContent<>(LIFESPAN, new LiveResourcesRefresher(state)));
+    cache.put(LIVE_CONTAINERS, new CachedContent<>(LIFESPAN, new LiveContainersRefresher(state)));
+    cache.put(LIVE_COMPONENTS, new CachedContent<>(LIFESPAN, new LiveComponentsRefresher(state)));
+    cache.put(LIVE_NODES, new CachedContent<>(LIFESPAN, new LiveNodesRefresher(state)));
     cache.put(MODEL_DESIRED,
-        new CachedContent<AggregateConf>(LIFESPAN,
-            new AggregateModelRefresher(state, false)));
+        new CachedContent<>(LIFESPAN, new AggregateModelRefresher(state, false)));
     cache.put(MODEL_RESOLVED,
-        new CachedContent<AggregateConf>(LIFESPAN,
-            new AggregateModelRefresher(state, true)));
+        new CachedContent<>(LIFESPAN, new AggregateModelRefresher(state, true)));
     cache.put(MODEL_RESOLVED_APPCONF,
-        new CachedContent<ConfTree>(LIFESPAN,
-            new AppconfRefresher(state, false, false)));
+        new CachedContent<>(LIFESPAN, new AppconfRefresher(state, false, false)));
     cache.put(MODEL_RESOLVED_RESOURCES,
-        new CachedContent<ConfTree>(LIFESPAN,
-            new AppconfRefresher(state, false, true)));
+        new CachedContent<>(LIFESPAN, new AppconfRefresher(state, false, true)));
     cache.put(MODEL_DESIRED_APPCONF,
-        new CachedContent<ConfTree>(LIFESPAN,
-            new AppconfRefresher(state, true, false)));
+        new CachedContent<>(LIFESPAN, new AppconfRefresher(state, true, false)));
     cache.put(MODEL_DESIRED_RESOURCES,
-        new CachedContent<ConfTree>(LIFESPAN,
-            new AppconfRefresher(state, true, true)));
-    cache.put(LIVE_STATISTICS,
-        new CachedContent<Map<String, Integer>>(LIFESPAN,
-            new LiveStatisticsRefresher(state)));
+        new CachedContent<>(LIFESPAN, new AppconfRefresher(state, true, true)));
+    cache.put(LIVE_STATISTICS, new CachedContent<>(LIFESPAN, new LiveStatisticsRefresher(state)));
     return cache;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResource.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResource.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResource.java
index 383bc5a..1b54a31 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResource.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/ApplicationResource.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.webapp.NotFoundException;
 import org.apache.slider.api.types.ApplicationLivenessInformation;
 import org.apache.slider.api.types.ComponentInformation;
 import org.apache.slider.api.types.ContainerInformation;
+import org.apache.slider.api.types.NodeInformation;
 import org.apache.slider.core.conf.AggregateConf;
 import org.apache.slider.core.conf.ConfTree;
 import org.apache.slider.core.exceptions.NoSuchNodeException;
@@ -285,8 +286,7 @@ public class ApplicationResource extends AbstractSliderResource {
   public Map<String, ComponentInformation> getLiveComponents() {
     markGet(SLIDER_SUBPATH_APPLICATION, LIVE_COMPONENTS);
     try {
-      return (Map<String, ComponentInformation>) cache.lookup(
-          LIVE_COMPONENTS);
+      return (Map<String, ComponentInformation>) cache.lookup(LIVE_COMPONENTS);
     } catch (Exception e) {
       throw buildException(LIVE_COMPONENTS, e);
     }
@@ -346,6 +346,38 @@ TODO: decide what structure to return here, then implement
   }
 */
 
+
+  @GET
+  @Path(LIVE_NODES)
+  @Produces({APPLICATION_JSON})
+  public Map<String, NodeInformation> getLiveNodes() {
+    markGet(SLIDER_SUBPATH_APPLICATION, LIVE_COMPONENTS);
+    try {
+      return (Map<String, NodeInformation>) cache.lookup(LIVE_NODES);
+    } catch (Exception e) {
+      throw buildException(LIVE_COMPONENTS, e);
+    }
+  }
+
+  @GET
+  @Path(LIVE_NODES + "/{node}")
+  @Produces({APPLICATION_JSON})
+  public NodeInformation getLiveNode(@PathParam("node") String node) {
+    markGet(SLIDER_SUBPATH_APPLICATION, LIVE_COMPONENTS);
+    try {
+      NodeInformation ni = state.getNodeInformation(node);
+      if (ni != null) {
+        return ni;
+      } else {
+        throw new NotFoundException("Unknown node: " + node);
+      }
+    } catch (NotFoundException e) {
+      throw e;
+    } catch (Exception e) {
+      throw buildException(LIVE_CONTAINERS + "/" + node, e);
+    }
+  }
+
   /**
    * Statistics of the application
    * @return snapshot statistics

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/resources/LiveNodesRefresher.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/resources/LiveNodesRefresher.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/resources/LiveNodesRefresher.java
new file mode 100644
index 0000000..d4ab8fe
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/web/rest/application/resources/LiveNodesRefresher.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.web.rest.application.resources;
+
+import org.apache.slider.api.types.NodeInformation;
+import org.apache.slider.server.appmaster.state.StateAccessForProviders;
+
+import java.util.Map;
+
+/**
+ * Update the live nodes map
+ */
+public class LiveNodesRefresher
+    implements ResourceRefresher<Map<String, NodeInformation>> {
+
+  private final StateAccessForProviders state;
+
+  public LiveNodesRefresher(StateAccessForProviders state) {
+    this.state = state;
+  }
+
+  @Override
+  public Map<String, NodeInformation> refresh() {
+    return state.getNodeInformationSnapshot();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/proto/SliderClusterMessages.proto
----------------------------------------------------------------------
diff --git a/slider-core/src/main/proto/SliderClusterMessages.proto b/slider-core/src/main/proto/SliderClusterMessages.proto
index 8287f36..50c10e4 100644
--- a/slider-core/src/main/proto/SliderClusterMessages.proto
+++ b/slider-core/src/main/proto/SliderClusterMessages.proto
@@ -384,3 +384,15 @@ message GetCertificateStoreRequestProto {
 message GetCertificateStoreResponseProto {
   required bytes store = 1;
 }
+
+message GetLiveNodesRequestProto {
+}
+
+message GetLiveNodesResponseProto {
+  repeated string names = 1;
+  repeated NodeInformationProto nodes = 2;
+}
+
+message GetLiveNodeRequestProto {
+  required string name = 1;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/main/proto/SliderClusterProtocol.proto
----------------------------------------------------------------------
diff --git a/slider-core/src/main/proto/SliderClusterProtocol.proto b/slider-core/src/main/proto/SliderClusterProtocol.proto
index aa59bb4..4221b1d 100644
--- a/slider-core/src/main/proto/SliderClusterProtocol.proto
+++ b/slider-core/src/main/proto/SliderClusterProtocol.proto
@@ -129,18 +129,24 @@ service SliderClusterProtocolPB {
 
   rpc getLivenessInformation(GetApplicationLivenessRequestProto) 
     returns(ApplicationLivenessInformationProto);
- 
+
   rpc getLiveContainers(GetLiveContainersRequestProto) 
     returns(GetLiveContainersResponseProto);
 
   rpc getLiveContainer(GetLiveContainerRequestProto) 
     returns(ContainerInformationProto);
-  
+
   rpc getLiveComponents(GetLiveComponentsRequestProto) 
     returns(GetLiveComponentsResponseProto);
-  
+
   rpc getLiveComponent(GetLiveComponentRequestProto) 
     returns(ComponentInformationProto);
+
+  rpc getLiveNodes(GetLiveNodesRequestProto)
+    returns(GetLiveNodesResponseProto);
+
+  rpc getLiveNode(GetLiveNodeRequestProto)
+    returns(NodeInformationProto);
   
 // AggregateConf getModelDesired()
   rpc getModelDesired(EmptyPayloadProto) 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/agent/rest/AbstractAppApiTestDelegates.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/rest/AbstractAppApiTestDelegates.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/rest/AbstractAppApiTestDelegates.groovy
index 58eb49e..6d1bcfc 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/rest/AbstractAppApiTestDelegates.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/rest/AbstractAppApiTestDelegates.groovy
@@ -24,8 +24,10 @@ import org.apache.slider.api.SliderApplicationApi
 import org.apache.slider.api.StateValues
 import org.apache.slider.api.types.ComponentInformation
 import org.apache.slider.api.types.ContainerInformation
+import org.apache.slider.api.types.NodeInformation
 import org.apache.slider.core.conf.ConfTreeOperations
 import org.apache.slider.test.Outcome
+import org.junit.Test
 
 import static org.apache.slider.api.ResourceKeys.*
 import static org.apache.slider.api.StatusKeys.*
@@ -203,6 +205,17 @@ public abstract class AbstractAppApiTestDelegates extends AbstractRestTestDelega
     assert !liveness.requestsOutstanding
   }
 
+
+  public void testListNodes() throws Throwable {
+    describe "Node listing via $appAPI"
+    def liveNodes = appAPI.liveNodes
+    assert liveNodes.size() > 0
+    def h = liveNodes.keySet()[0];
+    def localhost = appAPI.getLiveNode(h)
+
+
+  }
+
   /**
    * Probe that spins until the liveness query fails
    * @param args argument map
@@ -226,6 +239,7 @@ public abstract class AbstractAppApiTestDelegates extends AbstractRestTestDelega
     testLiveContainers();
     testRESTModel()
     testAppLiveness()
+//    testListNodes();
   }
 
   public void testFlexOperation() {
@@ -263,9 +277,10 @@ public abstract class AbstractAppApiTestDelegates extends AbstractRestTestDelega
     String key = args["key"]
     String val  = args["val"]
     def resolved = appAPI.getResolvedResources()
-    return Outcome.fromBool(resolved.get(key)==val)
+    return Outcome.fromBool(resolved.get(key) == val)
   }
 
+
   /**
    * Get the resolved value and push that out as the new state
    * 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/agent/rest/IpcApiClientTestDelegates.java
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/rest/IpcApiClientTestDelegates.java b/slider-core/src/test/groovy/org/apache/slider/agent/rest/IpcApiClientTestDelegates.java
index 9411c3c..f339f6d 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/rest/IpcApiClientTestDelegates.java
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/rest/IpcApiClientTestDelegates.java
@@ -25,5 +25,5 @@ public class IpcApiClientTestDelegates extends AbstractAppApiTestDelegates {
   public IpcApiClientTestDelegates(SliderApplicationApi appAPI) {
     super(true, appAPI);
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/agent/rest/LowLevelRestTestDelegates.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/rest/LowLevelRestTestDelegates.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/rest/LowLevelRestTestDelegates.groovy
index c0f123d..3d87c28 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/rest/LowLevelRestTestDelegates.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/rest/LowLevelRestTestDelegates.groovy
@@ -59,14 +59,14 @@ class LowLevelRestTestDelegates extends AbstractRestTestDelegate {
     application = appendToURL(appmaster, SLIDER_PATH_APPLICATION)
   }
 
-
   public void testCodahaleOperations() throws Throwable {
     describe "Codahale operations  $this"
     getWebPage(appmaster)
     getWebPage(appmaster, SYSTEM_THREADS)
     getWebPage(appmaster, SYSTEM_HEALTHCHECK)
     getWebPage(appmaster, SYSTEM_PING)
-    getWebPage(appmaster, SYSTEM_METRICS_JSON)
+    def page = getWebPage(appmaster, SYSTEM_METRICS_JSON)
+    validateCodahaleJson(parseMetrics(page))
   }
   
   public void logCodahaleMetrics() {
@@ -78,7 +78,7 @@ class LowLevelRestTestDelegates extends AbstractRestTestDelegate {
 
   public void testMimeTypes() throws Throwable {
     describe "Mime Types  $this"
-    HttpOperationResponse response= executeGet(
+    HttpOperationResponse response = executeGet(
         appendToURL(appmaster,
         SLIDER_PATH_APPLICATION, LIVE_RESOURCES))
     response.headers.each { key, val -> log.info("$key $val")}

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/agent/rest/TestStandaloneREST.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/agent/rest/TestStandaloneREST.groovy b/slider-core/src/test/groovy/org/apache/slider/agent/rest/TestStandaloneREST.groovy
index 7e10d30..880d9ca 100644
--- a/slider-core/src/test/groovy/org/apache/slider/agent/rest/TestStandaloneREST.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/agent/rest/TestStandaloneREST.groovy
@@ -18,16 +18,8 @@
 
 package org.apache.slider.agent.rest
 
-import com.sun.jersey.api.client.Client
-import com.sun.jersey.api.client.config.ClientConfig
-import com.sun.jersey.api.json.JSONConfiguration
-import com.sun.jersey.client.apache.ApacheHttpClient
-import com.sun.jersey.client.apache.ApacheHttpClientHandler
-import com.sun.jersey.client.apache.config.DefaultApacheHttpClientConfig
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
-import org.apache.commons.httpclient.HttpClient
-import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager
 import org.apache.hadoop.registry.client.api.RegistryOperations
 import org.apache.hadoop.yarn.api.records.ApplicationReport
 import org.apache.slider.agent.AgentMiniClusterTestBase
@@ -42,8 +34,7 @@ import org.apache.slider.core.restclient.HttpOperationResponse
 import org.apache.slider.server.appmaster.rpc.RpcBinder
 import org.junit.Test
 
-import static org.apache.slider.server.appmaster.management.MetricsKeys.METRICS_LOGGING_ENABLED
-import static org.apache.slider.server.appmaster.management.MetricsKeys.METRICS_LOGGING_LOG_INTERVAL
+import static org.apache.slider.server.appmaster.management.MetricsKeys.*
 import static org.apache.slider.server.appmaster.web.rest.RestPaths.*
 
 @CompileStatic
@@ -79,7 +70,6 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
     // set up url config to match
     initHttpTestSupport(launcher.configuration)
 
-
     execOperation(WEB_STARTUP_TIME) {
       GET(directAM)
     }
@@ -88,7 +78,7 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
       def metrics = GET(directAM, SYSTEM_METRICS)
       log.info metrics
     }
-    
+
     GET(proxyAM)
 
     log.info GET(proxyAM, SYSTEM_PING)
@@ -96,15 +86,21 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
     log.info GET(proxyAM, SYSTEM_HEALTHCHECK)
     log.info GET(proxyAM, SYSTEM_METRICS_JSON)
 
-    /*
-    Is the back door required? If so, don't test complex verbs via the proxy
-    */
+    // using the metrics, await the first node status update
+    execOperation(WEB_STARTUP_TIME) {
+      def metrics = getMetrics(proxyAM)
+      if (!getGaugeAsBool(metrics,
+            "org.apache.slider.server.appmaster.state.RoleHistory.nodes-updated.flag", false)) {
+        throw new IOException("Nodes not updated in $metrics")
+      } else {
+        "true"
+      };
+    }
+    // Is the back door required? If so, don't test complex verbs via the proxy
     def proxyComplexVerbs = !SliderXmlConfKeys.X_DEV_INSECURE_REQUIRED
 
-    /*
-     * Only do direct complex verbs if the no back door is needed, or if
-     * it is enabled
-     */
+    //  Only do direct complex verbs if the no back door is needed, or if
+    //  it is enabled
     def directComplexVerbs = proxyComplexVerbs || SLIDER_CONFIG.getBoolean(
         SliderXmlConfKeys.X_DEV_INSECURE_WS,
         SliderXmlConfKeys.X_DEV_INSECURE_DEFAULT)
@@ -117,14 +113,12 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
     log.info "Content type: ${response.contentType}"
 
     describe "proxied response headers from AM Web resources"
-    response = executeGet(appendToURL(proxyAM,
-        SLIDER_PATH_APPLICATION, LIVE_RESOURCES))
+    response = executeGet(appendToURL(proxyAM, SLIDER_PATH_APPLICATION, LIVE_RESOURCES))
     response.headers.each { key, val -> log.info("$key $val") }
     log.info "Content type: ${response.contentType}"
 
-    
     def ugiClient = createUGIJerseyClient();
-    
+
     describe "Proxy SliderRestClient Tests"
     RestAPIClientTestDelegates proxySliderRestAPI =
         new RestAPIClientTestDelegates(proxyAM, ugiClient, proxyComplexVerbs)
@@ -134,8 +128,7 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
     RestAPIClientTestDelegates directSliderRestAPI =
         new RestAPIClientTestDelegates(directAM, ugiClient, directComplexVerbs)
     directSliderRestAPI.testSuiteAll()
-    
-    
+
     describe "Proxy Jersey Tests"
     JerseyTestDelegates proxyJerseyTests =
         new JerseyTestDelegates(proxyAM, ugiClient, proxyComplexVerbs)
@@ -143,14 +136,12 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
 
     describe "Direct Jersey Tests"
 
-    JerseyTestDelegates directJerseyTests =
-        new JerseyTestDelegates(directAM, ugiClient)
+    JerseyTestDelegates directJerseyTests = new JerseyTestDelegates(directAM, ugiClient)
     directJerseyTests.testSuiteAll()
 
     describe "Direct Tests"
 
-    LowLevelRestTestDelegates direct =
-        new LowLevelRestTestDelegates(directAM, directComplexVerbs)
+    LowLevelRestTestDelegates direct = new LowLevelRestTestDelegates(directAM, directComplexVerbs)
     direct.testSuiteAll()
 
     describe "Proxy Tests"
@@ -176,13 +167,10 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
 
     describe( "IPC equivalent operations")
     def sliderClusterProtocol = RpcBinder.getProxy(conf, report, 1000)
-    SliderApplicationIpcClient ipcClient =
-        new SliderApplicationIpcClient(sliderClusterProtocol)
-    IpcApiClientTestDelegates ipcDelegates =
-        new IpcApiClientTestDelegates(ipcClient)
+    SliderApplicationIpcClient ipcClient = new SliderApplicationIpcClient(sliderClusterProtocol)
+    IpcApiClientTestDelegates ipcDelegates = new IpcApiClientTestDelegates(ipcClient)
     ipcDelegates.testSuiteAll()
-    
-    
+
     // log the metrics to show what's up
     direct.logCodahaleMetrics();
 
@@ -193,23 +181,4 @@ class TestStandaloneREST extends AgentMiniClusterTestBase {
     }
   }
 
-  /**
-   * Create Jersey client with URL handling by way
-   * of the Apache HttpClient classes. 
-   * @return a Jersey client
-   */
-  public static Client createJerseyClientHttpClient() {
-
-    def httpclient = new HttpClient(new MultiThreadedHttpConnectionManager());
-    httpclient.httpConnectionManager.params.connectionTimeout = 10000;
-    ClientConfig clientConfig = new DefaultApacheHttpClientConfig();
-    clientConfig.features[JSONConfiguration.FEATURE_POJO_MAPPING] = Boolean.TRUE;
-
-    def handler = new ApacheHttpClientHandler(httpclient, clientConfig);
-
-    def client = new ApacheHttpClient(handler)
-    client.followRedirects = true
-    return client;
-  }
- 
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAppRestIntegration.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAppRestIntegration.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAppRestIntegration.groovy
index 7838886..d36fdbc 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAppRestIntegration.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/appstate/TestMockAppStateAppRestIntegration.groovy
@@ -50,7 +50,6 @@ class TestMockAppStateAppRestIntegration extends BaseMockAppStateTest implements
   public void testCachedIntDocument() throws Throwable {
     ContentCache cache = new ContentCache()
 
-
     def refresher = new IntRefresher()
     assert 0 == refresher.count
     def entry = new CachedContentManagedTimer(refresher)
@@ -103,8 +102,8 @@ class TestMockAppStateAppRestIntegration extends BaseMockAppStateTest implements
         new ApplicationResource(webAppApi)
     def containers = applicationResource.liveContainers
     assert containers.size() == instances.size()
-    
   }
+
   /**
    * Get a state accessor for the appState field
    * @return something to hand down to refreshers and resources
@@ -146,7 +145,7 @@ class TestMockAppStateAppRestIntegration extends BaseMockAppStateTest implements
 
   class CachedContentManagedTimer extends CachedContent {
     int time = 0;
-        
+
     @Override
     protected long now() {
       return time++;

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
index 82192b9..ec202b0 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/monkey/TestMockMonkey.groovy
@@ -148,7 +148,7 @@ class TestMockMonkey extends BaseMockAppStateTest {
   public void testContainerKillerIgnoresAM() throws Throwable {
 
     addAppMastertoAppState()
-    assert 1 == appState.liveNodes.size()
+    assert 1 == appState.liveContainers.size()
     
     def chaos = new ChaosKillContainer(appState,
         queues,

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
index 29f0510..e301e6f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/web/rest/publisher/TestPublisherRestResources.groovy
@@ -107,7 +107,7 @@ class TestPublisherRestResources extends AgentTestBase {
     webResource = client.resource(sliderConfigset + "dummy-site");
 
 
-    execOperation(30000) {
+    execOperation(WEB_STARTUP_TIME) {
       GET(sliderConfigset)
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3aeab9ca/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
index 9d14815..c5808f2 100644
--- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestUtils.groovy
@@ -24,6 +24,7 @@ import com.sun.jersey.api.client.config.DefaultClientConfig
 import com.sun.jersey.api.json.JSONConfiguration
 import com.sun.jersey.client.urlconnection.URLConnectionClientHandler
 import groovy.json.JsonOutput
+import groovy.json.JsonSlurper
 import groovy.transform.CompileStatic
 import groovy.util.logging.Slf4j
 import org.apache.commons.httpclient.HttpClient
@@ -73,6 +74,7 @@ import javax.ws.rs.core.HttpHeaders
 import java.util.concurrent.TimeoutException
 
 import static Arguments.ARG_OPTION
+import static org.apache.slider.server.appmaster.web.rest.RestPaths.SYSTEM_METRICS_JSON
 
 /**
  * Static utils for tests in this package and in other test projects.
@@ -655,7 +657,6 @@ class SliderTestUtils extends Assert {
         clientConfig);
   }
 
-
   /**
    * Create a jersey client config with the settings needed for tests
    * (e.g. POJO mappings)
@@ -707,7 +708,7 @@ class SliderTestUtils extends Assert {
            HttpCacheHeaders.HTTP_HEADER_CACHE_CONTROL_NONE
   }
 
-/**
+  /**
    * Assert that a service operation succeeded
    * @param service service
    */
@@ -738,6 +739,7 @@ class SliderTestUtils extends Assert {
     int actual = instances != null ? instances.size() : 0
     return actual
   }
+
   /**
    * Exec a set of commands, wait a few seconds for it to finish.
    * @param status code
@@ -752,11 +754,12 @@ class SliderTestUtils extends Assert {
     assert status == exitCode
     return process
   }
+
   /**
-     * Exec a set of commands, wait a few seconds for it to finish.
-     * @param commands
-     * @return
-     */
+   * Exec a set of commands, wait a few seconds for it to finish.
+   * @param commands
+   * @return
+   */
   public static ForkedProcessService exec(List<String> commands) {
     ForkedProcessService process;
     process = new ForkedProcessService(
@@ -1380,4 +1383,42 @@ class SliderTestUtils extends Assert {
     assert list.size() == entries.size()
     assert entries.containsAll(list)
   }
+
+  public Map parseMetrics(String metrics) {
+    new JsonSlurper().parse(metrics.bytes) as Map
+  }
+
+  public void validateCodahaleJson(Map metricsMap) {
+    assert metricsMap["version"] == "3.0.0"
+    assert metricsMap["gauges"] instanceof Map
+    assert metricsMap["histograms"] instanceof Map
+    assert metricsMap["timers"] instanceof Map
+  }
+
+  public int getGaugeValue(Map metricsMap, String gauge, int defVal) {
+    def entry = metricsMap["gauges"][gauge]
+    if (entry != null) {
+      return entry["value"] as int
+    } else {
+      return defVal
+    }
+  }
+
+  public boolean getGaugeAsBool(Map metricsMap, String gauge, boolean defVal) {
+    return 0 !=  getGaugeValue(metricsMap, gauge, defVal ? 1 : 0)
+  }
+
+  /**
+   * Fetch and parse the JSON codahale metrics under a path
+   * @param baseUrl base path
+   * @return the fetch, parsed and partially validated JSON mapping
+   */
+  public Map getMetrics(String baseUrl) {
+    def raw = GET(baseUrl, SYSTEM_METRICS_JSON)
+    def metrics = parseMetrics(raw)
+    validateCodahaleJson(metrics)
+    return metrics;
+  }
+
+
 }