You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2016/12/05 19:33:13 UTC

svn commit: r1772757 - in /uima/uima-ducc/trunk/src/main: admin/cron/ducc_watcher.crontab.example admin/tools/ducc_watcher scripts/tools/ducc_watcher

Author: degenaro
Date: Mon Dec  5 19:33:12 2016
New Revision: 1772757

URL: http://svn.apache.org/viewvc?rev=1772757&view=rev
Log:
UIMA-5053 DUCC ducc_watcher optional admin script to determine status and send notifications

- move from admin to bin

Added:
    uima/uima-ducc/trunk/src/main/scripts/tools/ducc_watcher   (with props)
Removed:
    uima/uima-ducc/trunk/src/main/admin/cron/ducc_watcher.crontab.example
    uima/uima-ducc/trunk/src/main/admin/tools/ducc_watcher

Added: uima/uima-ducc/trunk/src/main/scripts/tools/ducc_watcher
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/scripts/tools/ducc_watcher?rev=1772757&view=auto
==============================================================================
--- uima/uima-ducc/trunk/src/main/scripts/tools/ducc_watcher (added)
+++ uima/uima-ducc/trunk/src/main/scripts/tools/ducc_watcher Mon Dec  5 19:33:12 2016
@@ -0,0 +1,591 @@
+#! /usr/bin/env python
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# +
+# + ducc_watcher
+# +
+# + purpose: send e-mail when a DUCC daemon state changes to not up
+# + 
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import ast
+import datetime
+import getpass
+import json
+import logging
+import logging.handlers
+import os
+import smtplib
+import socket
+import string
+import sys
+import time
+import urllib
+
+from HTMLParser import HTMLParser
+
+from optparse import HelpFormatter
+from optparse import OptionGroup
+from optparse import OptionParser
+
+# ----------------------------------------------
+
+# Extend OptionParser class
+class ExtendedOptionParser(OptionParser):
+    # override epilog formatter so 
+    # that newlines are not deleted!
+    def format_epilog(self, formatter):
+        return self.epilog
+
+# ----------------------------------------------
+
+# parser for the system.daemons WS page
+class DuccHtmlParser(HTMLParser):
+    
+    tr_state = False
+    daemon_state = None
+    daemon_name = None  
+    daemon_date = None
+    daemon_ip = None
+    daemon_host = None
+    daemons = {}
+    
+    def get_daemons(self):
+        return self.daemons
+    
+    def handle_starttag(self, tag, attrs):
+        if(tag == 'tr' ):
+            self.tr_state = True
+        
+    def handle_endtag(self, tag):   
+        if(tag == 'tr'):
+            self.tr_state = False
+            self.daemon_state = None
+            self.daemon_name = None
+            self.daemon_date = None
+            self.daemon_ip = None
+            self.daemon_host = None
+        
+    def handle_data(self, data):
+        if(self.tr_state):
+            if(self.daemon_state == None):
+                self.daemon_state = data
+            elif(self.daemon_name == None):
+                self.daemon_name = data
+                if(self.daemon_name == 'Agent'):
+                    pass
+                else:
+                    self.daemons[self.daemon_name] = self.daemon_state
+            elif(self.daemon_date == None):
+                self.daemon_date = data
+            elif(self.daemon_ip == None):
+                self.daemon_ip = data
+            elif(self.daemon_host == None):
+                self.daemon_host = data
+                self.daemon_name = data
+                self.daemons[self.daemon_name] = self.daemon_state
+
+# ----------------------------------------------
+
+name = 'ducc_watcher'
+
+webserver = 'Webserver'
+head_daemons = [ 'Orchestrator',  'ResourceManager', 'Database', 'Broker', 'ProcessManager', 'ServiceManager', webserver ]
+
+flag_info = True
+flag_trace = False
+logger = None
+
+port = '42133'
+
+path = None
+log_file = None
+state_file = None
+
+flag_agents = False
+
+mail_host = 'localhost'
+email_list = None
+
+# produce a time stamp
+def get_timestamp():
+    tod = time.time()
+    timestamp = datetime.datetime.fromtimestamp(tod).strftime('%Y-%m-%d %H:%M:%S')         
+    return timestamp
+
+# get the host running this script
+def get_host():
+    host = socket.gethostname()
+    return host
+
+# get the user running this script
+def get_user():
+    user = getpass.getuser()
+    return user
+
+# make directories, if not already existing
+def mkdirs(path):
+    debug('mkdirs: path='+path)
+    if(os.path.exists(path)):
+        return
+    try:
+        os.makedirs(path)
+    except Exception,e:
+        exception(e)
+    
+# info message to log
+def info(text):
+    global logger
+    type = 'I'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.info(line)
+    return line
+
+# trace message to log
+def trace(text):
+    global logger
+    global flag_trace
+    type = 'T'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    if(flag_trace):
+        logger.debug(line)
+    return line
+
+# debug message to log
+def debug(text):
+    global logger
+    type = 'D'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.debug(line)
+    return line
+
+# error message to log
+def error(text):
+    global logger
+    type = 'E'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.error(line)
+    return line
+
+# warn message to log
+def warn(text):
+    global logger
+    type = 'W'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.warn(line)
+    return line
+
+# exit
+def exit(code):
+    text = 'exit code='+str(code)
+    email(text)
+    error(text)
+    sys.exit(code)
+
+# exception
+def exception(e):
+    line = error(str(e))
+    return line
+    
+# epilog for --help
+def get_epilog():
+    epilog = ''
+    return epilog
+
+# debug is normally not set
+def validate_debug(options):
+    global logger
+    if(options.flag_debug):
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+
+# consider head node daemons only
+# unless --agents is specified
+def validate_agents(options):
+    global flag_agents
+    if(options.flag_agents):
+        flag_agents = True
+    
+# use /tmp/<userid> as log+state directory 
+# unless --path is specified
+def validate_path(options):
+    if(options.path == None):
+        options.path = '/tmp'+'/'+get_user()
+    mkdirs(options.path)
+
+# setup rotating log file handler with
+# 8 versions of 8M bytes with base name
+# ducc_watcher.<target>.log
+def setup_log_file(options):  
+    global name
+    global target
+    global logger
+    log_file = options.path
+    if(not log_file.endswith('/')):
+        log_file = log_file + '/'
+    log_file = log_file + name + '.' + target +'.log'
+    handler = logging.handlers.RotatingFileHandler(
+        log_file, maxBytes=8*1024*1024, backupCount=8)
+    logger.addHandler(handler)
+    debug('log_file: '+log_file)
+
+# ducc_watcher.<target>.state
+def setup_state_file(options): 
+    global name 
+    global target
+    global state_file
+    state_file = options.path
+    if(not state_file.endswith('/')):
+        state_file = state_file + '/'
+    state_file = state_file + name + '.' + target +'.state'
+    debug('state_file: '+state_file)
+
+# must specify --target host:port of WS for fetching
+# of daemons status
+def validate_target(options):
+    global port
+    global target
+    global ducc_url_base
+    global ducc_url_servlet
+    protocol = 'http://'
+    servlet = '/ducc-servlet/classic-system-daemons-data'
+    if(options.target == None):
+        error('required "target" not specified')
+        exit(1)
+    target = options.target
+    if(':' not in target):
+        target = target+':'+str(port)
+    if(target.startswith(protocol)):
+        target = target.replace(protocol,'',1)
+    ducc_url_base = protocol+target
+    ducc_url_servlet = protocol+target+servlet
+    debug('target: '+ducc_url_base)
+    
+# list of e-mail recipients, if any
+def validate_email_list(options):
+    global email_list
+    if(not options.email_list == None):
+        email_list = options.email_list.split()
+    debug('email-list: '+str(email_list))
+
+# parse command line
+def parse_cmdline():
+    global name
+    parser = ExtendedOptionParser(epilog=get_epilog())
+    width = 45
+    parser.formatter.help_position = width
+    parser.formatter.max_help_position = width
+    parser.add_option('-a','--agents', action='store_true', dest='flag_agents', default=False, 
+                               help='include agents')
+    parser.add_option('-d','--debug', action='store_true', dest='flag_debug', default=False, 
+                               help='display debugging messages')
+    parser.add_option('-e','--email-list', action='store', dest='email_list', default=None, 
+                               help='blank separated list of email addresses to receive down + error notifications')
+    parser.add_option('-p','--path', action='store', dest='path', default=None,
+                               help='path to directory where log and state information are written, default is /tmp'+'/'+get_user())
+    parser.add_option('-t','--target', action='store', dest='target', default=None,
+                               help='[REQUIRED] <host> with default port of '+port+' or <host>:<port>')
+
+    (options, args) = parser.parse_args()
+    # -d
+    validate_debug(options)
+    # -t
+    validate_target(options)
+    # -e
+    validate_email_list(options)
+    # -p
+    validate_path(options)
+    # dependencies
+    setup_log_file(options)
+    setup_state_file(options)
+    # -a
+    validate_agents(options)
+
+# determine if named daemon is one of the head node ones
+def is_head(key):
+    global head_daemons
+    retVal = False
+    if(key in head_daemons):
+        retVal = True   
+    return retVal
+
+# get rid of noise. remove if
+# 1. state is unknown
+# 2. if is agent and agents are not wanted
+def filter(state_dict):
+    global flag_agents
+    retVal = {}
+    for key in state_dict:
+        if(state_dict[key] == 'unknown'):
+            pass
+        else:
+            if(is_head(key)):
+                retVal[key] = state_dict[key]
+            elif(flag_agents):
+                retVal[key] = state_dict[key]
+    return retVal
+
+# summarize state of all ducc daemons
+def summarize(state_dict):
+    global head_daemons
+    retVal = 'up'
+    if(len(state_dict) < len(head_daemons)):
+        retVal = 'down'
+    else:
+        for key in state_dict:
+            if(not state_dict[key] == 'up'):
+                retVal = 'down'
+                break;
+    return retVal
+
+# read precious daemons state
+def read_state_previous():
+    global state_dict_previous
+    global state_file
+    state_dict_previous = {}
+    try:
+        with open(state_file, 'r') as f:
+            s = f.read()
+            state_dict_previous = ast.literal_eval(s)
+            debug('state_previous(read): '+str(state_dict_previous))
+            state_dict_previous = filter(state_dict_previous)
+            debug('state_previous(filter): '+str(state_dict_previous))
+    except Exception,e:
+        error('unable to read state from '+state_file)
+        exception(e)
+
+# write previous daemons state
+def write_state_previous():
+    global state_dict_previous
+    global state_file
+    try:
+        with open(state_file, 'w') as f:
+            f.seek(0)
+            f.write(str(state_dict_previous)+'\n')
+            f.truncate()
+            debug('state_previous(write): '+str(state_dict_previous))
+    except Exception,e:
+        error('unable to write state to '+state_file)
+        exception(e)
+
+# current becomes previous daemons state
+def update_state_previous():
+    global state_dict_previous
+    global state_dict_current
+    state_dict_previous = state_dict_current
+    write_state_previous()
+
+# fetch current daemons state
+def fetch_state_current():
+    global flag_agents
+    global state_dict_current
+    global ducc_url_servlet
+    global webserver
+    state_dict_current = {}
+    try:
+        import urllib2
+        opener = urllib2.build_opener()
+        if(flag_agents):
+            opener.addheaders.append(('Cookie', 'DUCCagents=show'))
+        response = opener.open(ducc_url_servlet)
+        data = response.read()
+        parser = DuccHtmlParser()
+        parser.feed(data)
+        daemons = parser.get_daemons()
+        if(daemons == None):
+            debug('daemons is None')
+        else:
+            for daemon in daemons:
+                status = daemons[daemon]
+                trace(daemon+':'+' '+status+' ')
+                state_dict_current[daemon] = status
+            debug('state_current(read): '+str(state_dict_current))
+            state_dict_current = filter(state_dict_current)
+            debug('state_current(filter): '+str(state_dict_current))
+    except Exception,e:
+        # for WS status to down whenever contact fails
+        daemon = webserver
+        status = 'unreachable'
+        state_dict_current[daemon] = status
+        error('unable to fetch data from '+ducc_url_servlet)
+        exception(e)
+    debug('state_current: '+str(state_dict_current))
+
+# determine state changes between previous and current
+def determine_state_changes():
+    global state_dict_current
+    global state_dict_previous
+    global state_dict_not_up
+    global state_changes_count
+    state_changes_count = 0
+    state_dict_not_up = {}
+    for key in state_dict_current:
+        state_current = state_dict_current.get(key, '?')
+        if(state_current == 'up'):
+            pass
+        else:
+            state_dict_not_up[key] = state_current
+            state_previous = state_dict_previous.get(key, '?')
+            if(state_current == state_previous):
+                pass
+            else:
+                state_changes_count = state_changes_count + 1
+                info(key+' '+'from'+' '+state_previous+' '+'to'+' '+state_current)
+                
+# send email
+def email(HOST, SUBJECT, TO, FROM, TEXT):
+    try:
+        BODY = string.join((
+            "From: %s" % FROM,
+            "To: %s" % TO,
+            "Subject: %s" % SUBJECT ,
+            "",
+            TEXT
+            ), "\r\n")
+        server = smtplib.SMTP(HOST)
+        server.sendmail(FROM, [TO], BODY)
+        server.quit()
+        info('sent: ['+TO+'] '+TEXT)
+    except Exception,e:
+        error('not sent: ['+TO+'] '+TEXT)
+        exception(e)
+
+# send email
+def email_to_list(HOST, SUBJECT, TO_LIST, FROM, TEXT):
+    if(TO_LIST == None):
+        info('e-mail list empty')
+        error('not sent: ['+TO+'] '+TEXT)
+    else:
+        for TO in TO_LIST:
+            email(HOST, SUBJECT, TO, FROM, TEXT)
+        
+# check if all head node daemons are reported
+def is_all_head_daemons():
+    global state_dict_current
+    global head_daemons
+    debug('states: '+str(state_dict_current))
+    debug('daemons: '+str(head_daemons))
+    for daemon in head_daemons:
+        if(daemon in state_dict_current):
+            debug(daemon+' reporting')
+        else:
+            info(daemon+' not reporting')
+            retVal = False
+            break
+    retVal = True
+    debug('all head daemons: '+str(retVal))   
+    return retVal
+
+# check if only webserver is reported
+def is_only_webserver():
+    global state_dict_current
+    global webserver
+    retVal = False
+    len_cur = len(state_dict_current)
+    if(len_cur == 1):
+        if(webserver in state_dict_current):
+            debug(webserver+' only reporting')
+            retVal = True
+    debug('webserver only: '+str(retVal))        
+    return retVal
+
+# not reportable when ducc boot is in progress
+def is_reportable():
+    global head_daemons
+    global state_dict_current
+    retVal = False
+    if(is_only_webserver()):
+        retVal = True
+    elif(is_all_head_daemons()):
+        retVal = True
+    return retVal
+
+# e-mail message subject
+def get_subject(status):
+    global ducc_url_base
+    subject = 'DUCC'+' '+'status='+status+' '+ducc_url_base
+    return subject
+
+# e-mail message body
+def get_body(text):
+    global name
+    global ducc_url_base
+    sender = get_user()+'@'+get_host()
+    body = '['+sender+']'+' '+name+' '+'reports'+' '+ducc_url_base+' '+'state change:'+' '+text
+    return body
+
+# e-mail state changes, if any
+def email_state_changes():
+    global state_dict_current
+    global state_dict_previous
+    global state_dict_not_up
+    global state_changes_count
+    global mail_host
+    global email_list
+    if(is_reportable()):
+        sum_prv = summarize(state_dict_previous)
+        sum_cur = summarize(state_dict_current)
+        status = sum_cur
+        subject = get_subject(status)
+        if(state_changes_count > 0):
+            info('state_changes(count): '+str(state_changes_count))
+            HOST = mail_host
+            SUBJECT = subject
+            TO_LIST = email_list
+            FROM = get_user()+'@'+get_host()
+            TEXT = get_body(str(state_dict_not_up))
+            email_to_list(HOST, SUBJECT, TO_LIST, FROM, TEXT)
+        else:
+            debug('state_changes(count): '+str(state_changes_count))
+            if(sum_cur == sum_prv):
+                debug('state_current(summary): '+str(sum_cur))
+                debug('state_previous(summary): '+str(sum_prv))
+            else:
+                info('state_current(summary): '+str(sum_cur))
+                info('state_previous(summary): '+str(sum_prv))
+                if(sum_cur == 'up'):
+                    HOST = mail_host
+                    SUBJECT = subject
+                    TO_LIST = email_list
+                    FROM = get_user()+'@'+get_host()
+                    TEXT = get_body('All daemons up')
+                    email_to_list(HOST, SUBJECT, TO_LIST, FROM, TEXT)
+    
+# check for DUCC daemon status changes
+def main(argv):
+    global logger
+    try:
+        logger = logging.getLogger('logger')
+        handler = logging.StreamHandler(sys.stdout)
+        logger.addHandler(handler)
+        parse_cmdline()
+        read_state_previous()
+        fetch_state_current()
+        determine_state_changes() 
+        email_state_changes()
+        update_state_previous()
+    except Exception,e:
+        error('exception in main')
+        exception(e)
+             
+if __name__ == '__main__':
+    main(sys.argv[1:])

Propchange: uima/uima-ducc/trunk/src/main/scripts/tools/ducc_watcher
------------------------------------------------------------------------------
    svn:executable = *