You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2013/03/21 21:08:29 UTC

svn commit: r1459503 - /uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java

Author: degenaro
Date: Thu Mar 21 20:08:28 2013
New Revision: 1459503

URL: http://svn.apache.org/r1459503
Log:
UIMA-2767 DUCC orchestrator (CLI) health monitor should cancel job if init failure cap exceeded and procs assigned by RM == 0

Modified:
    uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java?rev=1459503&r1=1459502&r2=1459503&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java Thu Mar 21 20:08:28 2013
@@ -75,6 +75,26 @@ public class HealthMonitor {
 		return ckpt;
 	}
 	
+	private boolean isCancelJobCappedWithNoJobProcesses(IDuccWorkJob job) {
+		String methodName = "isCancelJobCappedWithNoJobProcesses";
+		logger.trace(methodName, null, messages.fetch("enter"));
+		boolean ckpt = false;
+		long count = job.getProcessInitFailureCount();
+		long cap = job.getProcessInitFailureCap();
+		long procs = job.getAliveProcessCount();
+		logger.debug(methodName, null, "fail.count:"+count+" "+"fail.cap:"+cap+" "+"alive.procs:"+procs);
+		if(count >= cap) {
+			if(job.getAliveProcessCount() == 0) {
+				IRationale rationale = new Rationale("health monitor detected no resources assigned and job initialization failures cap reached:"+cap);
+				StateManager.getInstance().jobTerminate(job, JobCompletionType.ProcessInitializationFailure, rationale, ProcessDeallocationType.JobCanceled);
+				logger.info(methodName, job.getDuccId(), JobCompletionType.ProcessInitializationFailure);
+				ckpt = true;
+			}
+		}
+		logger.trace(methodName, null, messages.fetch("exit"));
+		return ckpt;
+	}
+	
 	private boolean isCancelJobExcessiveProcessFailures(IDuccWorkJob job) {
 		String methodName = "isCancelJobExcessiveProcessFailures";
 		logger.trace(methodName, null, messages.fetch("enter"));
@@ -188,6 +208,9 @@ public class HealthMonitor {
 					if(isCancelJobExcessiveProcessFailures(job)) {
 						ckpt = true;
 					}
+					else if(isCancelJobCappedWithNoJobProcesses(job)) {
+						ckpt = true;
+					}
 					else if(isCancelJobDriverProcessFailed(job)) {
 						ckpt = true;
 					}
@@ -238,6 +261,9 @@ public class HealthMonitor {
 					else if(isCancelJobExcessiveInitializationFailures(service)) {
 						ckpt = true;
 					}
+					else if(isCancelJobCappedWithNoJobProcesses(service)) {
+						ckpt = true;
+					}
 					long cap = service.getProcessInitFailureCap();
 					// if an initialization cap was specified
 					if(cap > 0) {