You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2014/02/05 22:30:27 UTC

svn commit: r1564940 - in /uima/sandbox/uima-ducc/trunk: src/main/resources/service_monitors/ uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/ uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/

Author: challngr
Date: Wed Feb  5 21:30:27 2014
New Revision: 1564940

URL: http://svn.apache.org/r1564940
Log:
UIMA-3405 Add failure monitoring to default UIMA-AS pinger.

Added:
    uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external
Modified:
    uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing
    uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java

Modified: uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing (original)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing Wed Feb  5 21:30:27 2014
@@ -1,5 +1,5 @@
 service_ping_class = org.apache.uima.ducc.cli.UimaAsPing
-service_ping_arguments=meta_timeout=15000,broker_jmx_port=${ducc.broker.jmx.port},queue_threshold=0,window=3,enable_log=false
+service_ping_arguments=meta_timeout=15000,broker_jmx_port=${ducc.broker.jmx.port},max-failures=5,failure-window=10,enable_log=false
 #service_ping_jvmargs = whatever
 #service_ping_classpath= whatever stuff, in addition to the defaults supplied by the sm
 service_ping_dolog=False

Added: uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external?rev=1564940&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external (added)
+++ uima/sandbox/uima-ducc/trunk/src/main/resources/service_monitors/org.apache.uima.ducc.cli.UimaAsPing.external Wed Feb  5 21:30:27 2014
@@ -0,0 +1,11 @@
+service_ping_class = org.apache.uima.ducc.cli.UimaAsPing
+service_ping_arguments=meta-timeout-15000,broker-jmx-port=${ducc.broker.jmx.port},max-failures=5,failure-window=30,enable-log=true
+#service_ping_jvmargs = whatever
+service_ping_classpath=${DUCC_HOME}/lib/uima/*:${DUCC_HOME}/activemq/*:${DUCC_HOME}/lib//springframework/*
+service_ping_dolog=true
+service_ping_timeout = 10000
+
+internal = false
+
+
+

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/UimaAsPing.java Wed Feb  5 21:30:27 2014
@@ -30,6 +30,7 @@ import org.apache.uima.UIMAFramework;
 import org.apache.uima.aae.client.UimaASProcessStatus;
 import org.apache.uima.aae.client.UimaAsBaseCallbackListener;
 import org.apache.uima.aae.client.UimaAsynchronousEngine;
+import org.apache.uima.adapter.jms.client.BaseUIMAAsynchronousEngineCommon_impl;
 import org.apache.uima.adapter.jms.client.BaseUIMAAsynchronousEngine_impl;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.collection.EntityProcessStatus;
@@ -39,15 +40,24 @@ import org.apache.uima.ducc.common.utils
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.util.Level;
 
-// 'q_thresh=nn,window=mm,broker_jmx=1100,meta_timeout=10000'
 public class UimaAsPing
     extends AServicePing
 {
-    int window = 3;
-    int queue_threshold = 0;
-
     String ep;
 
+    int failure_max = 5;            // max consecutive run failures before reporting excessive failures
+                                    // which prevents restart of instances
+    int current_failures = 0;       // current consecutive run failures
+    int consecutive_failures = 0;   // n failures in consecutive pings
+    int failure_window_size = 15;   // 15 minutes
+    int monitor_rate = 1;           // ping rate, in minutes, min 1 used for calculations
+    int fail_index = 0;
+    int[] failure_window = null;    // tracks consecutive failures within a window
+    int failure_cursor = 0;
+    long service_id = 0;
+    
+    boolean excessive_failures = false;
+    
     String endpoint;
     String broker;
     int    meta_timeout;
@@ -64,7 +74,7 @@ public class UimaAsPing
     String pid;
     boolean gmfail = false;
     boolean enable_log = false;
-
+    
     public UimaAsPing()
     {
     }
@@ -94,16 +104,14 @@ public class UimaAsPing
         // not needed here fyi broker_port = url.getPort();
 
                 
-        //UIMAFramework.getLogger(BaseUIMAAsynchronousEngineCommon_impl.class).setLevel(Level.OFF);
-        //UIMAFramework.getLogger(BaseUIMAAsynchronousEngine_impl.class).setLevel(Level.OFF);
+        UIMAFramework.getLogger(BaseUIMAAsynchronousEngineCommon_impl.class).setLevel(Level.OFF);
+        UIMAFramework.getLogger(BaseUIMAAsynchronousEngine_impl.class).setLevel(Level.OFF);
         // there are a couple junky messages that slip by the above configurations.  turn the whole danged thing off.
         UIMAFramework.getLogger().setLevel(Level.INFO);
 
         if ( args == null ) {
             meta_timeout = 5000;
             broker_jmx_port = 1099;
-            queue_threshold = 0;
-            window = 3;
         } else {
             // 'q_thresh=nn,window=mm,broker_jmx_port=1100,meta_timeout=10000'
             // turn the argument string into properties
@@ -118,15 +126,16 @@ public class UimaAsPing
                 // TODO Auto-generated catch block
                 e.printStackTrace();
             }
-            meta_timeout = props.getIntProperty("meta_timeout", 5000);
-            broker_jmx_port = props.getIntProperty("broker_jmx_port", 1099);
-            queue_threshold = props.getIntProperty("queue_threshold", 0);
-            window = props.getIntProperty("window", 3);
-            enable_log = props.getBooleanProperty("enable_log", false);
-
+            meta_timeout         = props.getIntProperty    ("meta-timeout"   , 5000);
+            broker_jmx_port      = props.getIntProperty    ("broker-jmx-port", 1099);
+            enable_log           = props.getBooleanProperty("enable-log"     , false);
+            failure_max          = props.getIntProperty    ("max-failures"   , failure_max);
+            failure_window_size  = props.getIntProperty    ("failure-window" , failure_window_size);
+            failure_window = new int[failure_window_size];
+            failure_cursor = 0;
         }
-        queueSizeWindow = new int[window];
-        doLog("<ctr>", null, "INIT: meta_timeout", meta_timeout, "broker_jmx_port", broker_jmx_port, "queue_threshold", queue_threshold, "window", window);
+
+        doLog("<ctr>", null, "INIT: meta_timeout", meta_timeout, "broker-jmx-port", broker_jmx_port);
 
         this.monitor = new UimaAsServiceMonitor(endpoint, broker_host, broker_jmx_port);
     }
@@ -150,30 +159,65 @@ public class UimaAsPing
             }
         }
         System.out.println(buf);
+    }
 
+    private String fmtArray(int[] array)
+    {
+        Object[] vals = new Object[array.length];
+        StringBuffer sb = new StringBuffer();
+        
+        for ( int i = 0; i < array.length; i++ ) {
+            sb.append("%3s ");
+            vals[i] = Integer.toString(array[i]);
+        }
+        return String.format(sb.toString(), vals);
     }
 
-    void evaluateBrokerStatistics(IServiceStatistics stats)
+    void evaluateService(IServiceStatistics stats)
     {
     	String methodName = "evaluatePing";
         // Note that this particular pinger considers 'health' to be a function of whether
         // the get-mata worked AND the queue statistics.
         try {
             monitor.collect();
+            stats.setHealthy(true);       // this pinger defines 'healthy' as
+                                          // 'service responds to get-meta and broker returns jmx stats'
+
 
-            if ( queue_threshold > 0 ) {         // only do this if a threshold is set
-                // if the last 'n' q depths are > threshold, mark the service unhealthy
-                // primitive, but maybe an OK first guess
-                queueSizeWindow[queueCursor++ % window] = (int)monitor.getQueueSize();
-                int sum = 0;
-                for ( int i = 0; i < window; i++ ) {
-                    sum += queueSizeWindow[i];
+            monitor_rate = Integer.parseInt(smState.getProperty("monitor-rate") ) / 60000;       // convert to minutes
+            service_id   = Long.parseLong(smState.getProperty("service-id"));            
+            if (monitor_rate <= 0 ) monitor_rate = 1;                                            // minimum 1 minute allowed
+
+            // Calculate total instance failures within some configured window.  If we get a cluster
+            // of failures, signal excessive failures so SM stops spawning new ones.
+            int failures = Integer.parseInt(smState.getProperty("run-failures"));
+            doLog(methodName, "run-failures:", failures);
+            if ( (failure_window != null) && (failures > 0) ) {
+                int diff = failures - current_failures;  // nfailures since last update
+                current_failures = failures;
+
+                if ( diff > 0 ) {
+                    failure_window[failure_cursor++] = diff;
+                } else {
+                    failure_window[failure_cursor++] = 0;                    
                 }
-                sum = sum / window;
-                stats.setHealthy( sum < queue_threshold ? true : false );
-                doLog(methodName, null, "EVAL: Q depth", monitor.getQueueSize(), "window", sum, "health", stats.isHealthy());
-            } else {
-                stats.setHealthy(true);
+
+                doLog(methodName, "failures", failures, "current_failures", current_failures, 
+                      "failure_window", fmtArray(failure_window), "failure_cursor", failure_cursor);
+
+                failure_cursor = failure_cursor % failure_window_size;
+
+
+
+                int windowed_failures = 0;
+                excessive_failures = false;
+                for ( int i = 0; i < failure_window_size; i++ ) {
+                    windowed_failures += failure_window[i];                    
+                }
+                if ( windowed_failures >= failure_max ) {
+                    excessive_failures = true;
+                }
+                doLog(methodName, "windowed_failures", windowed_failures, "excessive_failures", excessive_failures);
             }
 
         } catch ( Throwable t ) {
@@ -182,6 +226,11 @@ public class UimaAsPing
         }
     }
 
+    public boolean isExcessiveFailures()
+    {
+        return excessive_failures;
+    }
+
     public IServiceStatistics getStatistics()
     {
         String methodName = "getStatistics";
@@ -189,7 +238,7 @@ public class UimaAsPing
         nodeIp = "N/A";
         pid = "N/A";
 
-        evaluateBrokerStatistics(statistics);       // if we get here, the get-meta worked well enough
+        evaluateService(statistics);       // if we get here, the get-meta worked well enough
 
         // Instantiate Uima AS Client
         BaseUIMAAsynchronousEngine_impl uimaAsEngine = new BaseUIMAAsynchronousEngine_impl();

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java Wed Feb  5 21:30:27 2014
@@ -215,7 +215,7 @@ class PingDriver
     protected boolean resolveBooleanProperty(String prop, DuccProperties ping_props, DuccProperties job_props, boolean deflt)
     {
         String val = resolveStringProperty(prop, ping_props, job_props, Boolean.toString(deflt));
-        return ( val.equalsIgnoreCase("t") ||             // must be t T true TRUE - all else is false
+        return ( val.equalsIgnoreCase("True") ||
                  val.equalsIgnoreCase("true") );
     }
 
@@ -339,23 +339,23 @@ class PingDriver
     {
         if ( classpath == null ) {
             @SuppressWarnings("unchecked")
-			Class<AServicePing> cl = (Class<AServicePing>) Class.forName(ping_class);
+                Class<AServicePing> cl = (Class<AServicePing>) Class.forName(ping_class);
             return (AServicePing) cl.newInstance();
         } else {
-             String[] cp_elems = classpath.split(":");
-             URL[]    cp_urls = new URL[cp_elems.length];
+            String[] cp_elems = classpath.split(":");
+            URL[]    cp_urls = new URL[cp_elems.length];
             
-             for ( int i = 0; i < cp_elems.length; i++ ) {
-                 cp_urls[i] = new URL("file://" + cp_elems[i]);                
-             }
-             @SuppressWarnings("resource")
-			URLClassLoader l = new URLClassLoader(cp_urls);
-             @SuppressWarnings("rawtypes")
-			Class loaded_class = l.loadClass(ping_class);
-             l = null;
-             return (AServicePing) loaded_class.newInstance();
-        }
+            for ( int i = 0; i < cp_elems.length; i++ ) {
+                cp_urls[i] = new URL("file://" + cp_elems[i]);                
+            }
+            @SuppressWarnings("resource")
+                URLClassLoader l = new URLClassLoader(cp_urls);
+            @SuppressWarnings("rawtypes")
+                Class loaded_class = l.loadClass(ping_class);
+            l = null;
+            return (AServicePing) loaded_class.newInstance();
         }
+    }
 
     void runAsThread()
     {
@@ -382,13 +382,15 @@ class PingDriver
 		}
 
         try {
-            pinger.init(ping_arguments, endpoint);
             props.setProperty("total-instances", "" + sset.countImplementors());
             props.setProperty("active-instances", "" + sset.getActiveInstances());
             props.setProperty("references", "" + sset.countReferences());
-            props.setProperty("runfailures", "" + sset.getRunFailures());
+            props.setProperty("run-failures", "" + sset.getRunFailures());
+            props.setProperty("monitor-rate", "" + meta_ping_rate);
+            props.setProperty("service-id", "" + sset.getId().getFriendly());
 
             pinger.setSmState(props);
+            pinger.init(ping_arguments, endpoint);
             while ( ! shutdown ) {
                 
                 Pong pr = new Pong();
@@ -480,7 +482,7 @@ class PingDriver
         arglist.add(cp);
         //arglist.add("-Xmx100M");
         arglist.add("-Dcom.sun.management.jmxremote");
-        arglist.add("org.apache.uima.ducc.smnew.ServicePingMain");
+        arglist.add("org.apache.uima.ducc.sm.ServicePingMain");
         arglist.add("--class");
         arglist.add(ping_class);
         arglist.add("--endpoint");
@@ -645,10 +647,13 @@ class PingDriver
                     // Ask for the ping
                     try {
                         logger.info(methodName, sset.getId(), "ExtrnPingDriver: ping OUT");
-                        props.setProperty("total-instances", "" + sset.countImplementors());
+                        props.setProperty("total-instances" , "" + sset.countImplementors());
                         props.setProperty("active-instances", "" + sset.getActiveInstances());
-                        props.setProperty("references", "" + sset.countReferences());
-                        props.setProperty("runfailures", "" + sset.getRunFailures());
+                        props.setProperty("references"      , "" + sset.countReferences());
+                        props.setProperty("run-failures"    , "" + sset.getRunFailures());
+                        props.setProperty("monitor-rate"    , "" + meta_ping_rate);
+                        props.setProperty("service-id"      , "" + sset.getId().getFriendly());
+
                         oos.writeObject(new Ping(false, props));
                         oos.flush();
                         oos.reset();

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java Wed Feb  5 21:30:27 2014
@@ -111,7 +111,7 @@ public class ServiceManagerComponent 
     private boolean testmode = false;
 
     Map<String, String> administrators = new HashMap<String, String>();
-    String version = "1.1.0";
+    String version = "1.1.0+";
 
 	public ServiceManagerComponent(CamelContext context) 
     {

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java Wed Feb  5 21:30:27 2014
@@ -208,9 +208,8 @@ public class ServicePingMain
 		} catch (IOException e2) {
 			e2.printStackTrace();
 			return 1;
-		} finally {
-			try {sock.close();} catch ( Throwable t) {}
-		}
+		} 
+
         print ("ServicePingMain listens on port", sock.getLocalPort());
         InputStream sock_in = null;
 		OutputStream sock_out = null;

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java?rev=1564940&r1=1564939&r2=1564940&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java Wed Feb  5 21:30:27 2014
@@ -170,13 +170,6 @@ public class ServiceSet
 
         parseIndependentServices();
 
-        if ( ! job_props.containsKey("service_ping_dolog")) {
-            job_props.put("service_ping_dolog", "false");
-        }        
-        if ( !job_props.containsKey("service_ping_timeout") ) {
-            job_props.put("service_ping_timeout", ""+ServiceManagerComponent.meta_ping_timeout);
-        }
-
         meta_props.remove("references");          // Will get refreshred in upcoming OR state messages
         meta_props.put("service-class", ""+service_class.decode());
         meta_props.put("service-type", ""+service_type.decode());
@@ -514,8 +507,6 @@ public class ServiceSet
         reference_start = false;
         started = true;
         init_failures = 0;
-        run_failures = 0;
-        excessiveRunFailures = false;
     }
 
     /**
@@ -976,6 +967,7 @@ public class ServiceSet
                                         "Excessive initialization failures. Total failures[" + init_failures + "]",
                                         "allowed [" + init_failures_max + "], not restarting.");
                         }
+                        setAutostart(false);
                     } else {
                         logger.warn(methodName, id, "Instance", inst_id + ": Uunsolicited termination, not yet excessive.  Restarting instance.");
                         start(1);