You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2014/02/05 22:30:27 UTC
svn commit: r1564940 - in /uima/sandbox/uima-ducc/trunk:
src/main/resources/service_monitors/
uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/
uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/
Author: challngr
Date: Wed Feb 5 21:30:27 2014
New Revision: 1564940
URL: http://svn.apache.org/r1564940
Log:
UIMA-3405 Add failure monitoring to default UIMA-AS pinger.
Added:
uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external
Modified:
uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
Modified: uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing Wed Feb 5 21:30:27 2014
@@ -1,5 +1,5 @@
service_ping_class = org.apache.uima.ducc.cli.UimaAsPing
-service_ping_arguments=meta_timeout=15000,broker_jmx_port=${ducc.broker.jmx.port},queue_threshold=0,window=3,enable_log=false
+service_ping_arguments=meta_timeout=15000,broker_jmx_port=${ducc.broker.jmx.port},max-failures=5,failure-window=10,enable_log=false
#service_ping_jvmargs = whatever
#service_ping_classpath= whatever stuff, in addition to the defaults supplied by the sm
service_ping_dolog=False
Added: uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external?rev=1564940&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external Wed Feb 5 21:30:27 2014
@@ -0,0 +1,11 @@
+service_ping_class = org.apache.uima.ducc.cli.UimaAsPing
+service_ping_arguments=meta-timeout-15000,broker-jmx-port=${ducc.broker.jmx.port},max-failures=5,failure-window=30,enable-log=true
+#service_ping_jvmargs = whatever
+service_ping_classpath=${DUCC_HOME}/lib/uima/*:${DUCC_HOME}/activemq/*:${DUCC_HOME}/lib//springframework/*
+service_ping_dolog=true
+service_ping_timeout = 10000
+
+internal = false
+
+
+
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java Wed Feb 5 21:30:27 2014
@@ -30,6 +30,7 @@ import org.apache.uima.UIMAFramework;
import org.apache.uima.aae.client.UimaASProcessStatus;
import org.apache.uima.aae.client.UimaAsBaseCallbackListener;
import org.apache.uima.aae.client.UimaAsynchronousEngine;
+import org.apache.uima.adapter.jms.client.BaseUIMAAsynchronousEngineCommon_impl;
import org.apache.uima.adapter.jms.client.BaseUIMAAsynchronousEngine_impl;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.EntityProcessStatus;
@@ -39,15 +40,24 @@ import org.apache.uima.ducc.common.utils
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
-// 'q_thresh=nn,window=mm,broker_jmx=1100,meta_timeout=10000'
public class UimaAsPing
extends AServicePing
{
- int window = 3;
- int queue_threshold = 0;
-
String ep;
+ int failure_max = 5; // max consecutive run failures before reporting excessive failures
+ // which prevents restart of instances
+ int current_failures = 0; // current consecutive run failures
+ int consecutive_failures = 0; // n failures in consecutive pings
+ int failure_window_size = 15; // 15 minutes
+ int monitor_rate = 1; // ping rate, in minutes, min 1 used for calculations
+ int fail_index = 0;
+ int[] failure_window = null; // tracks consecutive failures within a window
+ int failure_cursor = 0;
+ long service_id = 0;
+
+ boolean excessive_failures = false;
+
String endpoint;
String broker;
int meta_timeout;
@@ -64,7 +74,7 @@ public class UimaAsPing
String pid;
boolean gmfail = false;
boolean enable_log = false;
-
+
public UimaAsPing()
{
}
@@ -94,16 +104,14 @@ public class UimaAsPing
// not needed here fyi broker_port = url.getPort();
- //UIMAFramework.getLogger(BaseUIMAAsynchronousEngineCommon_impl.class).setLevel(Level.OFF);
- //UIMAFramework.getLogger(BaseUIMAAsynchronousEngine_impl.class).setLevel(Level.OFF);
+ UIMAFramework.getLogger(BaseUIMAAsynchronousEngineCommon_impl.class).setLevel(Level.OFF);
+ UIMAFramework.getLogger(BaseUIMAAsynchronousEngine_impl.class).setLevel(Level.OFF);
// there are a couple junky messages that slip by the above configurations. turn the whole danged thing off.
UIMAFramework.getLogger().setLevel(Level.INFO);
if ( args == null ) {
meta_timeout = 5000;
broker_jmx_port = 1099;
- queue_threshold = 0;
- window = 3;
} else {
// 'q_thresh=nn,window=mm,broker_jmx_port=1100,meta_timeout=10000'
// turn the argument string into properties
@@ -118,15 +126,16 @@ public class UimaAsPing
// TODO Auto-generated catch block
e.printStackTrace();
}
- meta_timeout = props.getIntProperty("meta_timeout", 5000);
- broker_jmx_port = props.getIntProperty("broker_jmx_port", 1099);
- queue_threshold = props.getIntProperty("queue_threshold", 0);
- window = props.getIntProperty("window", 3);
- enable_log = props.getBooleanProperty("enable_log", false);
-
+ meta_timeout = props.getIntProperty ("meta-timeout" , 5000);
+ broker_jmx_port = props.getIntProperty ("broker-jmx-port", 1099);
+ enable_log = props.getBooleanProperty("enable-log" , false);
+ failure_max = props.getIntProperty ("max-failures" , failure_max);
+ failure_window_size = props.getIntProperty ("failure-window" , failure_window_size);
+ failure_window = new int[failure_window_size];
+ failure_cursor = 0;
}
- queueSizeWindow = new int[window];
- doLog("<ctr>", null, "INIT: meta_timeout", meta_timeout, "broker_jmx_port", broker_jmx_port, "queue_threshold", queue_threshold, "window", window);
+
+ doLog("<ctr>", null, "INIT: meta_timeout", meta_timeout, "broker-jmx-port", broker_jmx_port);
this.monitor = new UimaAsServiceMonitor(endpoint, broker_host, broker_jmx_port);
}
@@ -150,30 +159,65 @@ public class UimaAsPing
}
}
System.out.println(buf);
+ }
+ private String fmtArray(int[] array)
+ {
+ Object[] vals = new Object[array.length];
+ StringBuffer sb = new StringBuffer();
+
+ for ( int i = 0; i < array.length; i++ ) {
+ sb.append("%3s ");
+ vals[i] = Integer.toString(array[i]);
+ }
+ return String.format(sb.toString(), vals);
}
- void evaluateBrokerStatistics(IServiceStatistics stats)
+ void evaluateService(IServiceStatistics stats)
{
String methodName = "evaluatePing";
// Note that this particular pinger considers 'health' to be a function of whether
// the get-mata worked AND the queue statistics.
try {
monitor.collect();
+ stats.setHealthy(true); // this pinger defines 'healthy' as
+ // 'service responds to get-meta and broker returns jmx stats'
+
- if ( queue_threshold > 0 ) { // only do this if a threshold is set
- // if the last 'n' q depths are > threshold, mark the service unhealthy
- // primitive, but maybe an OK first guess
- queueSizeWindow[queueCursor++ % window] = (int)monitor.getQueueSize();
- int sum = 0;
- for ( int i = 0; i < window; i++ ) {
- sum += queueSizeWindow[i];
+ monitor_rate = Integer.parseInt(smState.getProperty("monitor-rate") ) / 60000; // convert to minutes
+ service_id = Long.parseLong(smState.getProperty("service-id"));
+ if (monitor_rate <= 0 ) monitor_rate = 1; // minimum 1 minute allowed
+
+ // Calculate total instance failures within some configured window. If we get a cluster
+ // of failures, signal excessive failures so SM stops spawning new ones.
+ int failures = Integer.parseInt(smState.getProperty("run-failures"));
+ doLog(methodName, "run-failures:", failures);
+ if ( (failure_window != null) && (failures > 0) ) {
+ int diff = failures - current_failures; // nfailures since last update
+ current_failures = failures;
+
+ if ( diff > 0 ) {
+ failure_window[failure_cursor++] = diff;
+ } else {
+ failure_window[failure_cursor++] = 0;
}
- sum = sum / window;
- stats.setHealthy( sum < queue_threshold ? true : false );
- doLog(methodName, null, "EVAL: Q depth", monitor.getQueueSize(), "window", sum, "health", stats.isHealthy());
- } else {
- stats.setHealthy(true);
+
+ doLog(methodName, "failures", failures, "current_failures", current_failures,
+ "failure_window", fmtArray(failure_window), "failure_cursor", failure_cursor);
+
+ failure_cursor = failure_cursor % failure_window_size;
+
+
+
+ int windowed_failures = 0;
+ excessive_failures = false;
+ for ( int i = 0; i < failure_window_size; i++ ) {
+ windowed_failures += failure_window[i];
+ }
+ if ( windowed_failures >= failure_max ) {
+ excessive_failures = true;
+ }
+ doLog(methodName, "windowed_failures", windowed_failures, "excessive_failures", excessive_failures);
}
} catch ( Throwable t ) {
@@ -182,6 +226,11 @@ public class UimaAsPing
}
}
+ public boolean isExcessiveFailures()
+ {
+ return excessive_failures;
+ }
+
public IServiceStatistics getStatistics()
{
String methodName = "getStatistics";
@@ -189,7 +238,7 @@ public class UimaAsPing
nodeIp = "N/A";
pid = "N/A";
- evaluateBrokerStatistics(statistics); // if we get here, the get-meta worked well enough
+ evaluateService(statistics); // if we get here, the get-meta worked well enough
// Instantiate Uima AS Client
BaseUIMAAsynchronousEngine_impl uimaAsEngine = new BaseUIMAAsynchronousEngine_impl();
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java Wed Feb 5 21:30:27 2014
@@ -215,7 +215,7 @@ class PingDriver
protected boolean resolveBooleanProperty(String prop, DuccProperties ping_props, DuccProperties job_props, boolean deflt)
{
String val = resolveStringProperty(prop, ping_props, job_props, Boolean.toString(deflt));
- return ( val.equalsIgnoreCase("t") || // must be t T true TRUE - all else is false
+ return ( val.equalsIgnoreCase("True") ||
val.equalsIgnoreCase("true") );
}
@@ -339,23 +339,23 @@ class PingDriver
{
if ( classpath == null ) {
@SuppressWarnings("unchecked")
- Class<AServicePing> cl = (Class<AServicePing>) Class.forName(ping_class);
+ Class<AServicePing> cl = (Class<AServicePing>) Class.forName(ping_class);
return (AServicePing) cl.newInstance();
} else {
- String[] cp_elems = classpath.split(":");
- URL[] cp_urls = new URL[cp_elems.length];
+ String[] cp_elems = classpath.split(":");
+ URL[] cp_urls = new URL[cp_elems.length];
- for ( int i = 0; i < cp_elems.length; i++ ) {
- cp_urls[i] = new URL("file://" + cp_elems[i]);
- }
- @SuppressWarnings("resource")
- URLClassLoader l = new URLClassLoader(cp_urls);
- @SuppressWarnings("rawtypes")
- Class loaded_class = l.loadClass(ping_class);
- l = null;
- return (AServicePing) loaded_class.newInstance();
- }
+ for ( int i = 0; i < cp_elems.length; i++ ) {
+ cp_urls[i] = new URL("file://" + cp_elems[i]);
+ }
+ @SuppressWarnings("resource")
+ URLClassLoader l = new URLClassLoader(cp_urls);
+ @SuppressWarnings("rawtypes")
+ Class loaded_class = l.loadClass(ping_class);
+ l = null;
+ return (AServicePing) loaded_class.newInstance();
}
+ }
void runAsThread()
{
@@ -382,13 +382,15 @@ class PingDriver
}
try {
- pinger.init(ping_arguments, endpoint);
props.setProperty("total-instances", "" + sset.countImplementors());
props.setProperty("active-instances", "" + sset.getActiveInstances());
props.setProperty("references", "" + sset.countReferences());
- props.setProperty("runfailures", "" + sset.getRunFailures());
+ props.setProperty("run-failures", "" + sset.getRunFailures());
+ props.setProperty("monitor-rate", "" + meta_ping_rate);
+ props.setProperty("service-id", "" + sset.getId().getFriendly());
pinger.setSmState(props);
+ pinger.init(ping_arguments, endpoint);
while ( ! shutdown ) {
Pong pr = new Pong();
@@ -480,7 +482,7 @@ class PingDriver
arglist.add(cp);
//arglist.add("-Xmx100M");
arglist.add("-Dcom.sun.management.jmxremote");
- arglist.add("org.apache.uima.ducc.smnew.ServicePingMain");
+ arglist.add("org.apache.uima.ducc.sm.ServicePingMain");
arglist.add("--class");
arglist.add(ping_class);
arglist.add("--endpoint");
@@ -645,10 +647,13 @@ class PingDriver
// Ask for the ping
try {
logger.info(methodName, sset.getId(), "ExtrnPingDriver: ping OUT");
- props.setProperty("total-instances", "" + sset.countImplementors());
+ props.setProperty("total-instances" , "" + sset.countImplementors());
props.setProperty("active-instances", "" + sset.getActiveInstances());
- props.setProperty("references", "" + sset.countReferences());
- props.setProperty("runfailures", "" + sset.getRunFailures());
+ props.setProperty("references" , "" + sset.countReferences());
+ props.setProperty("run-failures" , "" + sset.getRunFailures());
+ props.setProperty("monitor-rate" , "" + meta_ping_rate);
+ props.setProperty("service-id" , "" + sset.getId().getFriendly());
+
oos.writeObject(new Ping(false, props));
oos.flush();
oos.reset();
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java Wed Feb 5 21:30:27 2014
@@ -111,7 +111,7 @@ public class ServiceManagerComponent
private boolean testmode = false;
Map<String, String> administrators = new HashMap<String, String>();
- String version = "1.1.0";
+ String version = "1.1.0+";
public ServiceManagerComponent(CamelContext context)
{
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java Wed Feb 5 21:30:27 2014
@@ -208,9 +208,8 @@ public class ServicePingMain
} catch (IOException e2) {
e2.printStackTrace();
return 1;
- } finally {
- try {sock.close();} catch ( Throwable t) {}
- }
+ }
+
print ("ServicePingMain listens on port", sock.getLocalPort());
InputStream sock_in = null;
OutputStream sock_out = null;
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java Wed Feb 5 21:30:27 2014
@@ -170,13 +170,6 @@ public class ServiceSet
parseIndependentServices();
- if ( ! job_props.containsKey("service_ping_dolog")) {
- job_props.put("service_ping_dolog", "false");
- }
- if ( !job_props.containsKey("service_ping_timeout") ) {
- job_props.put("service_ping_timeout", ""+ServiceManagerComponent.meta_ping_timeout);
- }
-
meta_props.remove("references"); // Will get refreshred in upcoming OR state messages
meta_props.put("service-class", ""+service_class.decode());
meta_props.put("service-type", ""+service_type.decode());
@@ -514,8 +507,6 @@ public class ServiceSet
reference_start = false;
started = true;
init_failures = 0;
- run_failures = 0;
- excessiveRunFailures = false;
}
/**
@@ -976,6 +967,7 @@ public class ServiceSet
"Excessive initialization failures. Total failures[" + init_failures + "]",
"allowed [" + init_failures_max + "], not restarting.");
}
+ setAutostart(false);
} else {
logger.warn(methodName, id, "Instance", inst_id + ": Uunsolicited termination, not yet excessive. Restarting instance.");
start(1);