You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by st...@apache.org on 2015/03/24 21:01:40 UTC
[08/20] incubator-slider git commit: SLIDER-799 escalation process
implemented; tests are TBD
SLIDER-799 escalation process implemented; tests are TBD
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/6d955af0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/6d955af0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/6d955af0
Branch: refs/heads/feature/SLIDER-799-AM-managed-relax
Commit: 6d955af0f4f6d02ec4f3080191a060464513f6b6
Parents: 559ed00
Author: Steve Loughran <st...@apache.org>
Authored: Mon Mar 16 20:08:51 2015 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Mon Mar 16 20:08:51 2015 +0000
----------------------------------------------------------------------
.../org/apache/slider/api/InternalKeys.java | 12 +++-
.../org/apache/slider/api/ResourceKeys.java | 2 +-
.../providers/AbstractProviderService.java | 5 ++
.../server/appmaster/SliderAppMaster.java | 47 +++++++++++++---
.../actions/EscalateOutstandingRequests.java | 22 ++++++++
.../operations/AsyncRMOperationHandler.java | 8 ++-
.../operations/CancelSingleRequest.java | 58 ++++++++++++++++++++
.../ProviderNotifyingOperationHandler.java | 5 ++
.../operations/RMOperationHandlerActions.java | 12 +++-
.../slider/server/appmaster/state/AppState.java | 33 ++++++++---
.../server/appmaster/state/NodeInstance.java | 29 ++++------
.../appmaster/state/OutstandingRequest.java | 11 +++-
.../state/OutstandingRequestTracker.java | 38 ++++++++++++-
.../server/appmaster/state/RoleHistory.java | 34 ++++++++----
.../history/TestRoleHistoryNIComparators.groovy | 1 -
.../model/mock/MockProviderService.groovy | 5 ++
.../model/mock/MockRMOperationHandler.groovy | 11 +++-
17 files changed, 280 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
index b360fbe..38494a2 100644
--- a/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/InternalKeys.java
@@ -89,6 +89,7 @@ public interface InternalKeys {
* Default short life threshold: {@value}
*/
int DEFAULT_INTERNAL_CONTAINER_FAILURE_SHORTLIFE = 60;
+
/**
* Version of the app: {@value}
*/
@@ -177,5 +178,14 @@ public interface InternalKeys {
* 100% for chaos values
*/
int PROBABILITY_PERCENT_100 = 100 * PROBABILITY_PERCENT_1;
-
+
+ /**
+ * interval between checks for escalation: {@value}
+ */
+ String ESCALATION_CHECK_INTERVAL = "escalation.check.interval.seconds";
+
+ /**
+ * default value: {@value}
+ */
+ int DEFAULT_ESCALATION_CHECK_INTERVAL = 30;
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
index ce2a54f..94ce681 100644
--- a/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
+++ b/slider-core/src/main/java/org/apache/slider/api/ResourceKeys.java
@@ -145,7 +145,7 @@ public interface ResourceKeys {
* Time in seconds to relax placement delay
*/
String PLACEMENT_RELAX_DELAY =
- "yarn.placement.relax.delay.seconds";
+ "yarn.placement.relax.seconds";
/**
* Time to have a strict placement policy outstanding before
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
index fd7df73..7cba840 100644
--- a/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
+++ b/slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java
@@ -405,6 +405,11 @@ public abstract class AbstractProviderService
}
@Override
+ public void cancelSingleRequest(AMRMClient.ContainerRequest request) {
+ // no-op
+ }
+
+ @Override
public int cancelContainerRequests(Priority priority1,
Priority priority2,
int count) {
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
index 34bf20c..a93c60b 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java
@@ -113,6 +113,7 @@ import org.apache.slider.providers.agent.AgentKeys;
import org.apache.slider.providers.slideram.SliderAMClientProvider;
import org.apache.slider.providers.slideram.SliderAMProviderService;
import org.apache.slider.server.appmaster.actions.ActionRegisterServiceInstance;
+import org.apache.slider.server.appmaster.actions.EscalateOutstandingRequests;
import org.apache.slider.server.appmaster.actions.RegisterComponentInstance;
import org.apache.slider.server.appmaster.actions.QueueExecutor;
import org.apache.slider.server.appmaster.actions.QueueService;
@@ -141,7 +142,6 @@ import org.apache.slider.server.appmaster.state.ProviderAppState;
import org.apache.slider.server.appmaster.operations.RMOperationHandler;
import org.apache.slider.server.appmaster.state.RoleInstance;
import org.apache.slider.server.appmaster.state.RoleStatus;
-import org.apache.slider.server.appmaster.state.SimpleReleaseSelector;
import org.apache.slider.server.appmaster.web.AgentService;
import org.apache.slider.server.appmaster.web.rest.InsecureAmFilterInitializer;
import org.apache.slider.server.appmaster.web.rest.agent.AgentWebApp;
@@ -918,7 +918,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
initCompleted.set(true);
scheduleFailureWindowResets(instanceDefinition.getResources());
-
+ scheduleEscalation(instanceDefinition.getInternal());
try {
@@ -1664,13 +1664,28 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
log.info(
"Scheduling the failure window reset interval to every {} seconds",
seconds);
- RenewingAction<ResetFailureWindow> renew = new RenewingAction<ResetFailureWindow>(
+ RenewingAction<ResetFailureWindow> renew = new RenewingAction<>(
reset, seconds, seconds, TimeUnit.SECONDS, 0);
actionQueues.renewing("failures", renew);
} else {
log.info("Failure window reset interval is not set");
}
}
+
+ /**
+ * Schedule the escalation action
+ * @param internal
+ * @throws BadConfigException
+ */
+ private void scheduleEscalation(ConfTree internal) throws BadConfigException {
+ EscalateOutstandingRequests escalate = new EscalateOutstandingRequests();
+ ConfTreeOperations ops = new ConfTreeOperations(internal);
+ int seconds = ops.getGlobalOptions().getOptionInt(InternalKeys.ESCALATION_CHECK_INTERVAL,
+ InternalKeys.DEFAULT_ESCALATION_CHECK_INTERVAL);
+ RenewingAction<EscalateOutstandingRequests> renew = new RenewingAction<>(
+ escalate, seconds, seconds, TimeUnit.SECONDS, 0);
+ actionQueues.renewing("escalation", renew);
+ }
/**
* Look at where the current node state is -and whether it should be changed
@@ -1728,13 +1743,27 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
queue(new ActionStopSlider(e));
}
}
-
+
+ /**
+ * Escalate operation as triggered by external timer.
+ * <p>
+ * Get the list of new operations off the AM, then executest them.
+ */
+ public void escalateOutstandingRequests() {
+ List<AbstractRMOperation> operations = appState.escalateOutstandingRequests();
+ providerRMOperationHandler.execute(operations);
+ execute(operations);
+ }
+
+
/**
* Shutdown operation: release all containers
*/
private void releaseAllContainers() {
+ List<AbstractRMOperation> operations = appState.releaseAllContainers();
+ providerRMOperationHandler.execute(operations);
//now apply the operations
- execute(appState.releaseAllContainers());
+ execute(operations);
}
/**
@@ -1808,8 +1837,12 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
return rmOperationHandler.cancelContainerRequests(priority1, priority2, count);
}
-
-/* =================================================================== */
+ @Override
+ public void cancelSingleRequest(AMRMClient.ContainerRequest request) {
+ rmOperationHandler.cancelSingleRequest(request);
+ }
+
+ /* =================================================================== */
/* END */
/* =================================================================== */
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/EscalateOutstandingRequests.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/EscalateOutstandingRequests.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/EscalateOutstandingRequests.java
new file mode 100644
index 0000000..e527be6
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/EscalateOutstandingRequests.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.actions;
+
+public class EscalateOutstandingRequests {
+}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
index 1cbb960..7c98551 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java
@@ -78,7 +78,7 @@ public class AsyncRMOperationHandler extends RMOperationHandler {
break;
}
// a single release
- client.removeContainerRequest(request);
+ cancelSingleRequest(request);
remaining --;
}
}
@@ -86,6 +86,12 @@ public class AsyncRMOperationHandler extends RMOperationHandler {
}
@Override
+ public void cancelSingleRequest(AMRMClient.ContainerRequest request) {
+ // a single release
+ client.removeContainerRequest(request);
+ }
+
+ @Override
public void releaseAssignedContainer(ContainerId containerId) {
log.debug("Releasing container {}", containerId);
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/CancelSingleRequest.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/CancelSingleRequest.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/CancelSingleRequest.java
new file mode 100644
index 0000000..e4ccb10
--- /dev/null
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/CancelSingleRequest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.slider.server.appmaster.operations;
+
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.slider.server.appmaster.state.ContainerPriority;
+
+/**
+ * Cancel a container request
+ */
+public class CancelSingleRequest extends AbstractRMOperation {
+
+ private final Priority priority1;
+ private final Priority priority2;
+ private final int count;
+
+ public CancelSingleRequest(Priority priority1, Priority priority2, int count) {
+ this.priority1 = priority1;
+ this.priority2 = priority2;
+ this.count = count;
+ }
+
+ @Override
+ public void execute(RMOperationHandlerActions handler) {
+ handler.cancelContainerRequests(priority1, priority2, count);
+ }
+
+ @Override
+ public String toString() {
+ return "release " + count
+ + " requests for " + ContainerPriority.toString(priority1)
+ + " and " + ContainerPriority.toString(priority2);
+ }
+
+ /**
+ * Get the number to release
+ * @return the number of containers to release
+ */
+ public int getCount() {
+ return count;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
index 66df566..184a36a 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java
@@ -47,4 +47,9 @@ public class ProviderNotifyingOperationHandler extends RMOperationHandler {
int count) {
return providerService.cancelContainerRequests(priority1, priority2, count);
}
+
+ @Override
+ public void cancelSingleRequest(AMRMClient.ContainerRequest request) {
+ providerService.cancelSingleRequest(request);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
index 97fde09..594ee47 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java
@@ -27,9 +27,19 @@ import java.util.List;
public interface RMOperationHandlerActions {
void releaseAssignedContainer(ContainerId containerId);
- void addContainerRequest(AMRMClient.ContainerRequest req);
+ /**
+ * Issue a container request
+ * @param request
+ */
+ void addContainerRequest(AMRMClient.ContainerRequest request);
/**
+ * Cancel a specific request
+ * @param request request to cancel
+ */
+ void cancelSingleRequest(AMRMClient.ContainerRequest request);
+
+ /**
* Remove a container request
* @param priority1 priority to remove at
* @param priority2 second priority to target
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
index f2c237c..a887107 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
-import static org.apache.slider.api.StateValues.*;
import org.apache.slider.api.ClusterDescription;
import org.apache.slider.api.ClusterDescriptionKeys;
import org.apache.slider.api.ClusterDescriptionOperations;
@@ -88,8 +87,20 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
-import static org.apache.slider.api.ResourceKeys.*;
-import static org.apache.slider.api.RoleKeys.*;
+import static org.apache.slider.api.ResourceKeys.DEF_YARN_CORES;
+import static org.apache.slider.api.ResourceKeys.DEF_YARN_LABEL_EXPRESSION;
+import static org.apache.slider.api.ResourceKeys.DEF_YARN_MEMORY;
+import static org.apache.slider.api.ResourceKeys.YARN_CORES;
+import static org.apache.slider.api.ResourceKeys.YARN_LABEL_EXPRESSION;
+import static org.apache.slider.api.ResourceKeys.YARN_MEMORY;
+import static org.apache.slider.api.RoleKeys.ROLE_FAILED_INSTANCES;
+import static org.apache.slider.api.RoleKeys.ROLE_FAILED_STARTING_INSTANCES;
+import static org.apache.slider.api.RoleKeys.ROLE_RELEASING_INSTANCES;
+import static org.apache.slider.api.RoleKeys.ROLE_REQUESTED_INSTANCES;
+import static org.apache.slider.api.StateValues.STATE_CREATED;
+import static org.apache.slider.api.StateValues.STATE_DESTROYED;
+import static org.apache.slider.api.StateValues.STATE_LIVE;
+import static org.apache.slider.api.StateValues.STATE_SUBMITTED;
/**
@@ -704,7 +715,7 @@ public class AppState {
clusterStatusTemplate =
ClusterDescriptionOperations.buildFromInstanceDefinition(
- instanceDefinition);
+ instanceDefinition);
// Add the -site configuration properties
@@ -1286,7 +1297,7 @@ public class AppState {
int maxVal) {
String val = resources.getComponentOpt(name, option,
- Integer.toString(defVal));
+ Integer.toString(defVal));
Integer intVal;
if (ResourceKeys.YARN_RESOURCE_MAX.equals(val)) {
intVal = maxVal;
@@ -1856,11 +1867,19 @@ public class AppState {
}
roleHistory.resetFailedRecently();
}
+
+ /**
+ * Escalate operation as triggered by external timer.
+ * @return a (usually empty) list of cancel/request operations.
+ */
+ public List<AbstractRMOperation> escalateOutstandingRequests() {
+ return roleHistory.escalateOutstandingRequests();
+ }
/**
* Look at the allocation status of one role, and trigger add/release
* actions if the number of desired role instances doesn't equal
- * (actual+pending).
+ * (actual + pending).
* <p>
* MUST be executed from within a synchronized method
* <p>
@@ -2046,7 +2065,7 @@ public class AppState {
Collection<RoleInstance> targets = cloneOwnedContainerList();
log.info("Releasing {} containers", targets.size());
List<AbstractRMOperation> operations =
- new ArrayList<AbstractRMOperation>(targets.size());
+ new ArrayList<>(targets.size());
for (RoleInstance instance : targets) {
if (instance.roleId == SliderKeys.ROLE_AM_PRIORITY_INDEX) {
// don't worry about the AM
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
index 71b74fc..68c8a15 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java
@@ -41,7 +41,7 @@ public class NodeInstance {
*/
public NodeInstance(String hostname, int roles) {
this.hostname = hostname;
- nodeEntries = new ArrayList<NodeEntry>(roles);
+ nodeEntries = new ArrayList<>(roles);
}
/**
@@ -103,12 +103,8 @@ public class NodeInstance {
*/
public boolean isConsideredUnreliable(int role, int threshold) {
NodeEntry entry = get(role);
-
- if (entry != null) {
- return entry.getFailedRecently() > threshold;
- } else {
- return false;
- }
+
+ return entry != null && entry.getFailedRecently() > threshold;
}
/**
@@ -131,7 +127,8 @@ public class NodeInstance {
/**
- * run through each entry; gc'ing & removing old ones
+ * run through each entry; gc'ing & removing old ones that don't have
+ * a recent failure count (we care about those)
* @param absoluteTime age in millis
* @return true if there are still entries left
*/
@@ -140,7 +137,7 @@ public class NodeInstance {
ListIterator<NodeEntry> entries = nodeEntries.listIterator();
while (entries.hasNext()) {
NodeEntry entry = entries.next();
- if (entry.notUsedSince(absoluteTime)) {
+ if (entry.notUsedSince(absoluteTime) && entry.getFailedRecently() == 0) {
entries.remove();
} else {
active = true;
@@ -203,13 +200,8 @@ public class NodeInstance {
/**
* A comparator for sorting entries where the node is preferred over another.
- * If there's no entry for an element then it's failure count is set to -1, age to 0
- * for the purposes of the comparison
- * <ol>
- * <li>Entry exists => end of list as unknown</li>
- * <li>Lowest failure count</li>
- * <li>Age</li>
- * </ol>
+ * <p>
+ * The exact algorithm may change
*
* @return +ve int if left is preferred to right; -ve if right over left, 0 for equal
*/
@@ -227,7 +219,8 @@ public class NodeInstance {
NodeEntry left = o1.get(role);
NodeEntry right = o2.get(role);
- // sort by failure count first
+/*
+ // sort by failure count
int failL = left != null ? left.getFailedRecently() : -1;
int failR = right != null ? right.getFailedRecently() : -1;
@@ -237,7 +230,7 @@ public class NodeInstance {
if (failR > failL) {
return -1;
}
-
+ */
// failure counts are equal: compare age
long ageL = left != null ? left.getLastUsed() : 0;
long ageR = right != null ? right.getLastUsed() : 0;
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
index f6b83a7..7ad3fbb 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequest.java
@@ -187,7 +187,7 @@ public final class OutstandingRequest {
* as the original one, and the same host, but: relaxed placement, and a changed priority
* so as to place it into the relaxed list.
*/
- public AMRMClient.ContainerRequest buildEscalatedContainerRequest() {
+ public AMRMClient.ContainerRequest escalate() {
escalated = true;
Preconditions.checkNotNull(issuedRequest, "issued request");
Priority pri = ContainerPriority.createPriority(roleId, true);
@@ -220,6 +220,15 @@ public final class OutstandingRequest {
}
/**
+ * Query to see if the request is ready to be escalated
+ * @param time time to check against
+ * @return true if escalation should begin
+ */
+ public boolean shouldEscalate(long time) {
+ return !escalated && escalationTimeoutMillis < time;
+ }
+
+ /**
* Equality is on hostname and role
* @param o other
* @return true on a match
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
index e197a86..eb58e74 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/OutstandingRequestTracker.java
@@ -19,6 +19,10 @@
package org.apache.slider.server.appmaster.state;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.client.api.AMRMClient;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
+import org.apache.slider.server.appmaster.operations.CancelSingleRequest;
+import org.apache.slider.server.appmaster.operations.ContainerRequestOperation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -48,6 +52,11 @@ public class OutstandingRequestTracker {
protected static final Logger log =
LoggerFactory.getLogger(OutstandingRequestTracker.class);
+ /**
+ * no requests; saves creating a new list if not needed
+ */
+ private final List<AbstractRMOperation> NO_REQUESTS = new ArrayList<>(0);
+
private Map<OutstandingRequest, OutstandingRequest> placedRequests =
new HashMap<>();
@@ -122,10 +131,10 @@ public class OutstandingRequestTracker {
}
/**
- * Get the age of a container. If it is not known in the history,
+ * Get the age of a node hosting container. If it is not known in the history,
* return 0.
* @param c container
- * @return age, null if
+ * @return age, null if there's no entry for it.
*/
private long getAgeOf(Container c) {
long age = 0;
@@ -224,4 +233,29 @@ public class OutstandingRequestTracker {
public synchronized List<OutstandingRequest> listOutstandingRequests() {
return new ArrayList<>(placedRequests.values());
}
+
+ /**
+ * Escalate operation as triggered by external timer.
+ * @return a (usually empty) list of cancel/request operations.
+ */
+ public synchronized List<AbstractRMOperation> escalateOutstandingRequests(long now) {
+ if (placedRequests.isEmpty()) {
+ return NO_REQUESTS;
+ }
+
+ List<AbstractRMOperation> operations = new ArrayList<>();
+ for (OutstandingRequest outstandingRequest : placedRequests.values()) {
+ if (outstandingRequest.shouldEscalate(now)) {
+
+ // time to escalate
+ CancelSingleRequest cancel = new CancelSingleRequest(outstandingRequest.issuedRequest);
+ operations.add(cancel);
+ AMRMClient.ContainerRequest escalated =
+ outstandingRequest.escalate();
+ operations.add(new ContainerRequestOperation(escalated));
+ }
+
+ }
+ return operations;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
----------------------------------------------------------------------
diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
index acfe606..0d4de2d 100644
--- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
+++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java
@@ -18,17 +18,7 @@
package org.apache.slider.server.appmaster.state;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
+import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.Container;
@@ -38,12 +28,22 @@ import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.slider.common.tools.SliderUtils;
import org.apache.slider.core.exceptions.BadConfigException;
import org.apache.slider.providers.ProviderRole;
+import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
import org.apache.slider.server.avro.RoleHistoryHeader;
import org.apache.slider.server.avro.RoleHistoryWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
/**
* The Role History.
@@ -854,4 +854,14 @@ public class RoleHistory {
return lst;
}
+
+ /**
+ * Escalate operation as triggered by external timer.
+ * @return a (usually empty) list of cancel/request operations.
+ */
+ public List<AbstractRMOperation> escalateOutstandingRequests() {
+ long now = now();
+ return outstandingRequests.escalateOutstandingRequests(now);
+
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
index 74dfd42..b26b2f0 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/history/TestRoleHistoryNIComparators.groovy
@@ -63,7 +63,6 @@ class TestRoleHistoryNIComparators extends BaseMockAppStateTest {
def preferred = new NodeInstance.Preferred(0)
assert preferred.compare(age6failing, age1failing) == -1
assert preferred.compare(age1failing, age6failing) == 1
- assert preferred.compare(age1failing, age1failing) == 0
}
@Test
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
index f8ab56d..44415f4 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockProviderService.groovy
@@ -276,6 +276,11 @@ class MockProviderService implements ProviderService {
}
@Override
+ void cancelSingleRequest(AMRMClient.ContainerRequest request) {
+
+ }
+
+ @Override
void execute(List<AbstractRMOperation> operations) {
}
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/6d955af0/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
----------------------------------------------------------------------
diff --git a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
index 297c597..a68ce02 100644
--- a/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
+++ b/slider-core/src/test/groovy/org/apache/slider/server/appmaster/model/mock/MockRMOperationHandler.groovy
@@ -39,7 +39,7 @@ class MockRMOperationHandler extends RMOperationHandler {
@Override
public void releaseAssignedContainer(ContainerId containerId) {
operations.add(new ContainerReleaseOperation(containerId))
- log.info("Releasing container ID " + containerId.getId())
+ log.info("Releasing container ID " + containerId.containerId)
releases++;
}
@@ -61,6 +61,15 @@ class MockRMOperationHandler extends RMOperationHandler {
return releaseable;
}
+ @Override
+ void cancelSingleRequest(AMRMClient.ContainerRequest request) {
+ // here assume that there is a copy of this request in the list
+ if (availableToCancel > 0) {
+ availableToCancel--;
+ cancelled++;
+ }
+ }
+
/**
* clear the history
*/