You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2013/03/21 21:08:29 UTC
svn commit: r1459503 -
/uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java
Author: degenaro
Date: Thu Mar 21 20:08:28 2013
New Revision: 1459503
URL: http://svn.apache.org/r1459503
Log:
UIMA-2767 DUCC orchestrator (CLI) health monitor should cancel job if init failure cap exceeded and procs assigned by RM == 0
Modified:
uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java?rev=1459503&r1=1459502&r2=1459503&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java Thu Mar 21 20:08:28 2013
@@ -75,6 +75,26 @@ public class HealthMonitor {
return ckpt;
}
+ private boolean isCancelJobCappedWithNoJobProcesses(IDuccWorkJob job) {
+ String methodName = "isCancelJobCappedWithNoJobProcesses";
+ logger.trace(methodName, null, messages.fetch("enter"));
+ boolean ckpt = false;
+ long count = job.getProcessInitFailureCount();
+ long cap = job.getProcessInitFailureCap();
+ long procs = job.getAliveProcessCount();
+ logger.debug(methodName, null, "fail.count:"+count+" "+"fail.cap:"+cap+" "+"alive.procs:"+procs);
+ if(count >= cap) {
+ if(job.getAliveProcessCount() == 0) {
+ IRationale rationale = new Rationale("health monitor detected no resources assigned and job initialization failures cap reached:"+cap);
+ StateManager.getInstance().jobTerminate(job, JobCompletionType.ProcessInitializationFailure, rationale, ProcessDeallocationType.JobCanceled);
+ logger.info(methodName, job.getDuccId(), JobCompletionType.ProcessInitializationFailure);
+ ckpt = true;
+ }
+ }
+ logger.trace(methodName, null, messages.fetch("exit"));
+ return ckpt;
+ }
+
private boolean isCancelJobExcessiveProcessFailures(IDuccWorkJob job) {
String methodName = "isCancelJobExcessiveProcessFailures";
logger.trace(methodName, null, messages.fetch("enter"));
@@ -188,6 +208,9 @@ public class HealthMonitor {
if(isCancelJobExcessiveProcessFailures(job)) {
ckpt = true;
}
+ else if(isCancelJobCappedWithNoJobProcesses(job)) {
+ ckpt = true;
+ }
else if(isCancelJobDriverProcessFailed(job)) {
ckpt = true;
}
@@ -238,6 +261,9 @@ public class HealthMonitor {
else if(isCancelJobExcessiveInitializationFailures(service)) {
ckpt = true;
}
+ else if(isCancelJobCappedWithNoJobProcesses(service)) {
+ ckpt = true;
+ }
long cap = service.getProcessInitFailureCap();
// if an initialization cap was specified
if(cap > 0) {