You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2015/03/26 19:01:57 UTC

[2/4] incubator-slider git commit: SLIDER-832 scanning for avaialable nodes to keep failed nodes on list -just skip them. Tests to show this & strict placement logic

SLIDER-832 scanning for avaialable nodes to keep failed nodes on list -just skip them. Tests to show this & strict placement logic


Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/fc7f7364
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/fc7f7364
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/fc7f7364

Branch: refs/heads/develop
Commit: fc7f7364e732c86127e28d0853bf5e4d3e4d6bc9
Parents: 63e2b80
Author: Steve Loughran <st...@apache.org>
Authored: Thu Mar 26 15:11:36 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Thu Mar 26 15:11:36 2015 +0000

----------------------------------------------------------------------
 .../server/appmaster/state/RoleHistory.java     | 26 ++++----
 .../TestRoleHistoryRequestTracking.groovy       | 64 ++++++++++++++++----
 .../model/mock/BaseMockAppStateTest.groovy      | 17 +++++-
 .../appmaster/model/mock/MockFactory.groovy     |  2 +-
 4 files changed, 85 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fc7f7364/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index 98cf4e4..c2a741c 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -488,30 +488,34 @@ public class RoleHistory {
     int roleId = role.getKey();
     boolean strictPlacement = role.isStrictPlacement();
     NodeInstance nodeInstance = null;
-    // get the list of possible targets
+    // Get the list of possible targets.
+    // This is a live list: changes here are preserved
     List<NodeInstance> targets = getNodesForRoleId(roleId);
     if (targets == null) {
-      // add an empty list here for ease downstream
-      targets = new ArrayList<>(0);
+      // nothing to allocate on
+      return null;
     }
+
     int cnt = targets.size();
     log.debug("There are {} node(s) to consider for {}", cnt, role.getName());
-    // spin until there's a candidate
-    while (!targets.isEmpty() && nodeInstance == null) {
-      NodeInstance head = targets.remove(0);
-      if (head.getActiveRoleInstances(roleId) == 0) {
+    for (int i = 0; i < cnt  && nodeInstance == null; i++) {
+      NodeInstance candidate = targets.get(i);
+      if (candidate.getActiveRoleInstances(roleId) == 0) {
         // no active instances: check failure statistics
-        if (strictPlacement || !head.exceedsFailureThreshold(role)) {
-          nodeInstance = head;
+        if (strictPlacement || !candidate.exceedsFailureThreshold(role)) {
+          targets.remove(i);
+          // exit criteria for loop is now met
+          nodeInstance = candidate;
         } else {
           // too many failures for this node
           log.info("Recent node failures is higher than threshold {}. Not requesting host {}",
-              role.getNodeFailureThreshold(), head.hostname);
+              role.getNodeFailureThreshold(), candidate.hostname);
         }
       }
     }
+
     if (nodeInstance == null) {
-      log.info("No historical node found for {}", role.getName());
+      log.info("No node found for {}", role.getName());
     }
     return nodeInstance;
   }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fc7f7364/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
index 82750a3..9847992 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryRequestTracking.groovy
@@ -18,14 +18,17 @@
 
 package org.apache.slider.server.appmaster.model.history
 
+import groovy.util.logging.Slf4j
 import org.apache.hadoop.yarn.api.records.Container
 import org.apache.hadoop.yarn.api.records.Resource
 import org.apache.hadoop.yarn.client.api.AMRMClient
+import org.apache.slider.providers.PlacementPolicy
 import org.apache.slider.providers.ProviderRole
 import org.apache.slider.server.appmaster.model.mock.BaseMockAppStateTest
 import org.apache.slider.server.appmaster.model.mock.MockContainer
 import org.apache.slider.server.appmaster.model.mock.MockFactory
 import org.apache.slider.server.appmaster.state.ContainerAllocationOutcome
+import org.apache.slider.server.appmaster.state.NodeEntry
 import org.apache.slider.server.appmaster.state.NodeInstance
 import org.apache.slider.server.appmaster.state.OutstandingRequest
 import org.apache.slider.server.appmaster.state.RoleHistory
@@ -37,15 +40,16 @@ import org.junit.Test
  * Test the RH availability list and request tracking: that hosts
  * get removed and added 
  */
+@Slf4j
 class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
 
   String roleName = "test"
 
   NodeInstance age1Active4 = nodeInstance(1, 4, 0, 0)
   NodeInstance age2Active2 = nodeInstance(2, 2, 0, 1)
+  NodeInstance age2Active0 = nodeInstance(2, 0, 0, 0)
   NodeInstance age3Active0 = nodeInstance(3, 0, 0, 0)
   NodeInstance age4Active1 = nodeInstance(4, 1, 0, 0)
-  NodeInstance age2Active0 = nodeInstance(2, 0, 0, 0)
   NodeInstance empty = new NodeInstance("empty", MockFactory.ROLE_COUNT)
 
   List<NodeInstance> nodes = [age2Active2, age2Active0, age4Active1, age1Active4, age3Active0]
@@ -87,24 +91,34 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
 
   @Test
   public void testRequestedNodeOffListWithFailures() throws Throwable {
+    assert 0 == roleStatus.key
+    assert !roleHistory.cloneAvailableList(0).isEmpty()
+
+    NodeEntry age3role0 = recordAsFailed(age3Active0, 0, 4)
+    assert age3Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
+    recordAsFailed(age2Active0, 0, 4)
+    assert age2Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
+    // expect to get a null node back
     NodeInstance ni = roleHistory.findNodeForNewInstance(roleStatus)
-    assert age3Active0 == ni
+    assert !ni
+
+    // which is translated to a no-location request
     AMRMClient.ContainerRequest req = roleHistory.requestInstanceOnNode(ni,
         roleStatus,
         resource,
         "")
-    assert 1 == req.nodes.size()
-    List<NodeInstance> a2 = roleHistory.cloneAvailableList(0)
-    assertListEquals([age2Active0], a2)
 
-    age3Active0.get(0).failedRecently = 4
-    req = roleHistory.requestInstanceOnNode(ni,
-        roleStatus,
-        resource,
-        "")
     assertNull(req.nodes)
 
-    age3Active0.get(0).failedRecently = 0
+    log.info "resetting failure count"
+    age3role0.resetFailedRecently()
+    roleHistory.dump()
+    assert 0 == age3role0.failedRecently
+    assert !age3Active0.isConsideredUnreliable(0, roleStatus.nodeFailureThreshold)
+    assert !roleHistory.cloneAvailableList(0).isEmpty()
+    // looking for a node should now find one
+    ni = roleHistory.findNodeForNewInstance(roleStatus)
+    assert ni == age3Active0
     req = roleHistory.requestInstanceOnNode(ni,
         roleStatus,
         resource,
@@ -113,6 +127,34 @@ class TestRoleHistoryRequestTracking extends BaseMockAppStateTest {
   }
 
   @Test
+  public void testStrictPlacementIgnoresFailures() throws Throwable {
+
+    def targetRole = role1Status
+    final ProviderRole providerRole1 = targetRole.providerRole
+    assert providerRole1.placementPolicy == PlacementPolicy.STRICT
+    int key = targetRole.key
+
+    recordAsFailed(age1Active4, key, 4)
+    recordAsFailed(age2Active0, key, 4)
+    recordAsFailed(age2Active2, key, 4)
+    recordAsFailed(age3Active0, key, 4)
+    recordAsFailed(age4Active1, key, 4)
+
+    // trigger a list rebuild
+    roleHistory.buildAvailableNodeLists();
+
+    assert !roleHistory.cloneAvailableList(key).isEmpty()
+
+
+    NodeInstance ni = roleHistory.findNodeForNewInstance(targetRole)
+    assert ni == age4Active1!= null
+    // next lookup returns next node
+    ni = roleHistory.findNodeForNewInstance(roleStatus)
+    assert ni == age3Active0
+  }
+
+
+  @Test
   public void testFindAndRequestNode() throws Throwable {
     AMRMClient.ContainerRequest req = roleHistory.requestNode(roleStatus, resource)
 

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fc7f7364/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
index f30fce6..3e5494f 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/BaseMockAppStateTest.groovy
@@ -35,6 +35,7 @@ import org.apache.slider.core.main.LauncherExitCodes
 import org.apache.slider.server.appmaster.operations.AbstractRMOperation
 import org.apache.slider.server.appmaster.state.AppState
 import org.apache.slider.server.appmaster.state.ContainerAssignment
+import org.apache.slider.server.appmaster.state.NodeEntry
 import org.apache.slider.server.appmaster.state.NodeInstance
 import org.apache.slider.server.appmaster.state.RoleInstance
 import org.apache.slider.server.appmaster.state.RoleStatus
@@ -147,7 +148,7 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
 
 
   public NodeInstance nodeInstance(long age, int live0, int live1=0, int live2=0) {
-    NodeInstance ni = new NodeInstance("age${age}live[${live0},${live1},$live2]",
+    NodeInstance ni = new NodeInstance("age${age}-[${live0},${live1},$live2]",
                                        MockFactory.ROLE_COUNT)
     ni.getOrCreate(0).lastUsed = age
     ni.getOrCreate(0).live = live0;
@@ -333,4 +334,18 @@ abstract class BaseMockAppStateTest extends SliderTestBase implements MockRoles
     return cids
   }
 
+  /**
+   * Record a node as failing
+   * @param node
+   * @param id
+   * @param count
+   * @return the entry
+   */
+  public NodeEntry recordAsFailed(NodeInstance node, int id, int count) {
+    def entry = node.getOrCreate(id)
+    1.upto(count) {
+      entry.containerCompleted(false)
+    }
+    entry
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/fc7f7364/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockFactory.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockFactory.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockFactory.groovy
index 06bc10c..fca3376 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockFactory.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockFactory.groovy
@@ -184,7 +184,7 @@ class MockFactory implements MockRoles {
 
   def roleMap(int count) {
     return [
-        (ResourceKeys.COMPONENT_INSTANCES):count.toString(),
+        (ResourceKeys.COMPONENT_INSTANCES): count.toString(),
     ]
   }