You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2014/02/07 14:48:18 UTC
svn commit: r1565655 - in /uima/sandbox/uima-ducc/trunk:
uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/
uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/
Author: challngr
Date: Fri Feb 7 13:48:17 2014
New Revision: 1565655
URL: http://svn.apache.org/r1565655
Log:
UIMA-3405 Implement service instance failure/restart policy.
Modified:
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java
uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/AServicePing.java Fri Feb 7 13:48:17 2014
@@ -28,9 +28,21 @@ import org.apache.uima.ducc.common.IServ
public abstract class AServicePing
{
+ protected int[] failure_window = null; // tracks consecutive failures within a window
+ protected int failure_cursor = 0; // cursor to track failures within the current window
+ protected int total_failures = 0; // current total run failures. usually monotonically increasing.
+ protected int failure_max = 3; // max allowed failures within any given window
+
+ protected int failure_window_size = 30; // 30 minutes. overridden at first ping
+ protected int monitor_rate = 1; // ping rate, in minutes, min 1 used for calculations
+
+ protected boolean log_enabled = false;
+ protected long service_id = 0;
protected Properties smState;
+ private org.apache.uima.ducc.common.utils.DuccLogger duccLogger = null;
+
/**
* Called by the ping driver, to pass in useful things the pinger may want.
* @param arguments This is passed in from the service specification's
@@ -42,6 +54,43 @@ public abstract class AServicePing
public abstract void init(String arguments, String endpoint) throws Exception;
/**
+ * Called by the ping driver to initialize static information about the service and
+ * pinger. This guy calls the public init() method and is not intended for public
+ * consumption.
+ *
+ * This guy initializes the default failure monitor, service id, whether the
+ * service log is enabled, and the monitor (ping) rate, all of which are
+ * free to be used by derived classes.
+ *
+ * @param arguments This is passed in from the service specification's
+ * service_ping_arguments string.
+ *
+ * @param endpoint This is the name of the service endpoint, as passed in
+ * at service registration.
+ *
+ * @param initProps Properties file with static data about the service and
+ * pinger.
+ */
+ public void init(String arguments, String endpoint, Properties initProps)
+ throws Exception
+ {
+ failure_window_size = Integer.parseInt(initProps.getProperty("failure-window"));
+ failure_window = new int[failure_window_size];
+ failure_cursor = 0;
+
+ monitor_rate = Integer.parseInt(initProps.getProperty("monitor-rate") ) / 60000; // convert to minutes
+ if (monitor_rate <= 0 ) monitor_rate = 1; // minimum 1 minute allowed
+
+ service_id = Long.parseLong(initProps.getProperty("service-id") );
+
+ log_enabled = Boolean.parseBoolean(initProps.getProperty("do-log"));
+
+ failure_max = Integer.parseInt(initProps.getProperty("failure-max"));
+
+ init(arguments, endpoint);
+ }
+
+ /**
* Stop is called by the ping wrapper when it is being killed. Implementors may optionally
* override this method with conenction shutdown code.
*/
@@ -53,31 +102,135 @@ public abstract class AServicePing
* for correct management and display of the service.
*/
public abstract IServiceStatistics getStatistics();
-
+ /**
+ * Current state of the monitored service is passed in here.
+ */
public void setSmState(Properties props)
{
smState = props;
}
+ /**
+ * Getter of the service state; Implementors may just access it directly if they want.
+ */
public Properties getSmState()
{
return smState;
}
+ /**
+ * Called by the service manager to query the number of additional needed service instances.
+ */
public int getAdditions()
{
return 0;
}
+ /**
+ * Called by the service manager to query the number of service insances to dump.
+ */
public int getDeletions()
{
return 0;
}
+ private String fmtArray(int[] array)
+ {
+ Object[] vals = new Object[array.length];
+ StringBuffer sb = new StringBuffer();
+
+ for ( int i = 0; i < array.length; i++ ) {
+ sb.append("%3s ");
+ vals[i] = Integer.toString(array[i]);
+ }
+ return String.format(sb.toString(), vals);
+ }
+
+ /**
+ * This is intended for use by the SM when it drives a pinger in an internal thread. External
+ * pingers won't have this set.
+ *
+ * However, external pinger's stdout is picked up by DUCC so the logger will still print
+ * stuff to the service log without the use of the ducc logger.
+ */
+ public void setLogger(org.apache.uima.ducc.common.utils.DuccLogger logger)
+ {
+ this.duccLogger = logger;
+ }
+
+ private void doLog(String methodName, Object ... msg)
+ {
+ if ( !log_enabled ) return;
+
+ StringBuffer buf = new StringBuffer(methodName);
+ buf.append(" ");
+ buf.append(Long.toString(service_id));
+ for ( Object o : msg ) {
+ buf.append(" ");
+ if ( o == null ) {
+ buf.append("<null>");
+ } else {
+ buf.append(o.toString());
+ }
+ }
+
+ if ( duccLogger != null ) {
+ duccLogger.info(methodName, null, buf);
+ } else {
+ System.out.println(buf);
+ }
+ }
+
+ /**
+ * This determines if there have been excessive service instance failures by tracking the
+ * number of failures, not consecutive, but rather within a window of time. It may be
+ * overridden by extending monitors.
+ */
public boolean isExcessiveFailures()
{
- return false;
+ String methodName = "isExcessiveFailures";
+ boolean excessive_failures = false;
+
+ // Calculate total instance failures within some configured window. If we get a cluster
+ // of failures, signal excessive failures so SM stops spawning new ones.
+ int failures = Integer.parseInt(smState.getProperty("run-failures"));
+ doLog(methodName, "failures:", failures, "total_failures", total_failures);
+ if ( failures > 0 ) {
+ int diff = Math.max(0, failures - total_failures); // nfailures since last update
+ if ( diff < 0 ) {
+ // This indicates an instance was restarted, which forces a cleaning of
+ // failure conditions.
+ total_failures = 0;
+ failure_cursor = 0;
+ for ( int i = 0; i < failure_window_size; i++ ) {
+ failure_window[i] = 0;
+ }
+ } else if ( diff > 0 ) {
+ total_failures += diff;
+ }
+
+ if ( diff >= 0 ) {
+ failure_window[failure_cursor++] = diff;
+ }
+
+ doLog(methodName, "failures", failures, "total_failures", total_failures,
+ "failure_window", fmtArray(failure_window), "failure_cursor", failure_cursor);
+
+ failure_cursor = failure_cursor % failure_window_size;
+
+ int windowed_failures = 0;
+ excessive_failures = false;
+ for ( int i = 0; i < failure_window_size; i++ ) {
+ windowed_failures += failure_window[i];
+ }
+ if ( windowed_failures >= failure_max ) {
+ excessive_failures = true;
+ }
+ doLog(methodName, "windowed_failures", windowed_failures, "excessive_failures", excessive_failures);
+ }
+ return excessive_failures;
}
+
}
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java Fri Feb 7 13:48:17 2014
@@ -45,18 +45,6 @@ public class UimaAsPing
{
String ep;
- int failure_max = 5; // max consecutive run failures before reporting excessive failures
- // which prevents restart of instances
- int current_failures = 0; // current consecutive run failures
- int consecutive_failures = 0; // n failures in consecutive pings
- int failure_window_size = 15; // 15 minutes
- int monitor_rate = 1; // ping rate, in minutes, min 1 used for calculations
- int fail_index = 0;
- int[] failure_window = null; // tracks consecutive failures within a window
- int failure_cursor = 0;
- long service_id = 0;
-
- boolean excessive_failures = false;
String endpoint;
String broker;
@@ -73,7 +61,6 @@ public class UimaAsPing
String nodeIp;
String pid;
boolean gmfail = false;
- boolean enable_log = false;
public UimaAsPing()
{
@@ -128,15 +115,8 @@ public class UimaAsPing
}
meta_timeout = props.getIntProperty ("meta-timeout" , 5000);
broker_jmx_port = props.getIntProperty ("broker-jmx-port", 1099);
- enable_log = props.getBooleanProperty("enable-log" , false);
- failure_max = props.getIntProperty ("max-failures" , failure_max);
- failure_window_size = props.getIntProperty ("failure-window" , failure_window_size);
- failure_window = new int[failure_window_size];
- failure_cursor = 0;
}
- doLog("<ctr>", null, "INIT: meta_timeout", meta_timeout, "broker-jmx-port", broker_jmx_port);
-
this.monitor = new UimaAsServiceMonitor(endpoint, broker_host, broker_jmx_port);
}
@@ -146,8 +126,8 @@ public class UimaAsPing
}
private void doLog(String methodName, Object ... msg)
- {
- if ( ! enable_log ) return;
+ {
+ if ( !log_enabled ) return;
StringBuffer buf = new StringBuffer(methodName);
for ( Object o : msg ) {
@@ -161,17 +141,6 @@ public class UimaAsPing
System.out.println(buf);
}
- private String fmtArray(int[] array)
- {
- Object[] vals = new Object[array.length];
- StringBuffer sb = new StringBuffer();
-
- for ( int i = 0; i < array.length; i++ ) {
- sb.append("%3s ");
- vals[i] = Integer.toString(array[i]);
- }
- return String.format(sb.toString(), vals);
- }
void evaluateService(IServiceStatistics stats)
{
@@ -182,55 +151,12 @@ public class UimaAsPing
monitor.collect();
stats.setHealthy(true); // this pinger defines 'healthy' as
// 'service responds to get-meta and broker returns jmx stats'
-
-
- monitor_rate = Integer.parseInt(smState.getProperty("monitor-rate") ) / 60000; // convert to minutes
- service_id = Long.parseLong(smState.getProperty("service-id"));
- if (monitor_rate <= 0 ) monitor_rate = 1; // minimum 1 minute allowed
-
- // Calculate total instance failures within some configured window. If we get a cluster
- // of failures, signal excessive failures so SM stops spawning new ones.
- int failures = Integer.parseInt(smState.getProperty("run-failures"));
- doLog(methodName, "run-failures:", failures);
- if ( (failure_window != null) && (failures > 0) ) {
- int diff = failures - current_failures; // nfailures since last update
- current_failures = failures;
-
- if ( diff > 0 ) {
- failure_window[failure_cursor++] = diff;
- } else {
- failure_window[failure_cursor++] = 0;
- }
-
- doLog(methodName, "failures", failures, "current_failures", current_failures,
- "failure_window", fmtArray(failure_window), "failure_cursor", failure_cursor);
-
- failure_cursor = failure_cursor % failure_window_size;
-
-
-
- int windowed_failures = 0;
- excessive_failures = false;
- for ( int i = 0; i < failure_window_size; i++ ) {
- windowed_failures += failure_window[i];
- }
- if ( windowed_failures >= failure_max ) {
- excessive_failures = true;
- }
- doLog(methodName, "windowed_failures", windowed_failures, "excessive_failures", excessive_failures);
- }
-
} catch ( Throwable t ) {
stats.setHealthy(false);
monitor.setJmxFailure(t.getMessage());
}
}
- public boolean isExcessiveFailures()
- {
- return excessive_failures;
- }
-
public IServiceStatistics getStatistics()
{
String methodName = "getStatistics";
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java Fri Feb 7 13:48:17 2014
@@ -145,6 +145,7 @@ class PingDriver
this.log_directory = resolveStringProperty ("log_directory", ping_props, job_props, null); // cli always puts this int job props, no default
+
jvm_args_str = jvm_args_str.trim();
if ( jvm_args_str.equals("") ) {
jvm_args = null;
@@ -357,12 +358,30 @@ class PingDriver
}
}
+ void setCommonInitProperties(Properties props)
+ {
+ props.setProperty("monitor-rate" , "" + meta_ping_rate);
+ props.setProperty("service-id" , "" + sset.getId().getFriendly());
+ props.setProperty("failure-max" , "" + ServiceManagerComponent.failure_max);
+ props.setProperty("failure-window" , "" + ServiceManagerComponent.failure_window);
+ props.setProperty("do-log" , "" + do_log);
+ }
+
+ void setCommonProperties(Properties props)
+ {
+ props.setProperty("total-instances" , "" + sset.countImplementors());
+ props.setProperty("active-instances", "" + sset.getActiveInstances());
+ props.setProperty("references" , "" + sset.countReferences());
+ props.setProperty("run-failures" , "" + sset.getRunFailures());
+ }
+
void runAsThread()
{
long tid = Thread.currentThread().getId();
String methodName = "runAsThread[" + tid + "]";
AServicePing pinger = null;
+ Properties initProps = new Properties();
Properties props = new Properties();
try {
@@ -381,18 +400,14 @@ class PingDriver
return;
}
- try {
- props.setProperty("total-instances", "" + sset.countImplementors());
- props.setProperty("active-instances", "" + sset.getActiveInstances());
- props.setProperty("references", "" + sset.countReferences());
- props.setProperty("run-failures", "" + sset.getRunFailures());
- props.setProperty("monitor-rate", "" + meta_ping_rate);
- props.setProperty("service-id", "" + sset.getId().getFriendly());
-
- pinger.setSmState(props);
- pinger.init(ping_arguments, endpoint);
- while ( ! shutdown ) {
-
+ try {
+ setCommonInitProperties(initProps);
+ pinger.setLogger(logger);
+ pinger.init(ping_arguments, endpoint, initProps);
+
+ while ( ! shutdown ) {
+ setCommonProperties(props);
+ pinger.setSmState(props);
Pong pr = new Pong();
pr.setStatistics(pinger.getStatistics());
@@ -647,13 +662,7 @@ class PingDriver
// Ask for the ping
try {
logger.info(methodName, sset.getId(), "ExtrnPingDriver: ping OUT");
- props.setProperty("total-instances" , "" + sset.countImplementors());
- props.setProperty("active-instances", "" + sset.getActiveInstances());
- props.setProperty("references" , "" + sset.countReferences());
- props.setProperty("run-failures" , "" + sset.getRunFailures());
- props.setProperty("monitor-rate" , "" + meta_ping_rate);
- props.setProperty("service-id" , "" + sset.getId().getFriendly());
-
+ setCommonProperties(props);
oos.writeObject(new Ping(false, props));
oos.flush();
oos.reset();
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java Fri Feb 7 13:48:17 2014
@@ -688,8 +688,9 @@ public class ServiceHandler
wanted = instances;
}
+ sset.setNInstances(running + instances);
+ sset.resetRuntimeErrors();
if ( update ) {
- sset.setNInstances(running + instances);
sset.saveMetaProperties();
}
@@ -743,8 +744,8 @@ public class ServiceHandler
tolose = Math.min(instances, running);
}
+ sset.setNInstances(Math.max(0, running - instances)); // never persist < 0 registered instance
if ( update ) {
- sset.setNInstances(Math.max(0, running - instances)); // never persist < 0 registered instance
sset.saveMetaProperties();
}
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java Fri Feb 7 13:48:17 2014
@@ -98,6 +98,7 @@ public class ServiceManagerComponent
static String default_ping_class;
static int failure_max = 5;
+ static int failure_window = 30;
private String state_dir = null;
private String state_file = null;
@@ -253,6 +254,7 @@ public class ServiceManagerComponent
DuccDaemonRuntimeProperties.getInstance().boot(DaemonName.ServiceManager,getProcessJmxUrl());
failure_max = SystemPropertyResolver.getIntProperty("ducc.sm.instance.failure.max", failure_max);
+ failure_window = SystemPropertyResolver.getIntProperty("ducc.sm.instance.failure.window", failure_window);
meta_ping_rate = SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.rate", meta_ping_rate);
meta_ping_timeout = SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.timeout", meta_ping_timeout);
meta_ping_stability = SystemPropertyResolver.getIntProperty("ducc.sm.meta.ping.stability", meta_ping_stability);
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java?rev=1565655&r1=1565654&r2=1565655&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java Fri Feb 7 13:48:17 2014
@@ -470,6 +470,12 @@ public class ServiceSet
return ping_only;
}
+ synchronized void resetRuntimeErrors()
+ {
+ run_failures = 0;
+ excessiveRunFailures = false;
+ }
+
synchronized void setAutostart(boolean auto)
{
cancelLinger();