You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2017/01/04 20:37:26 UTC

svn commit: r1777371 - in /uima/uima-ducc/trunk: issuesFixed/ src/main/admin/ src/main/scripts/ src/main/scripts/tools/ uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/ uima-ducc-duccdocs/src/site/tex/duccbook/part4/ uima-ducc-orchestrator/src/main/...

Author: degenaro
Date: Wed Jan  4 20:37:26 2017
New Revision: 1777371

URL: http://svn.apache.org/viewvc?rev=1777371&view=rev
Log:
UIMA-5232 DUCC missing DUCC Book Documentation
> admin/ducc_disk_info
> bin/ducc_status
> bin/ducc_watcher

Added:
    uima/uima-ducc/trunk/src/main/admin/ducc_disk_info   (with props)
    uima/uima-ducc/trunk/src/main/scripts/ducc_status   (with props)
    uima/uima-ducc/trunk/src/main/scripts/ducc_watcher   (with props)
    uima/uima-ducc/trunk/src/main/scripts/ducc_watcher.crontab.example
Removed:
    uima/uima-ducc/trunk/src/main/scripts/tools/
Modified:
    uima/uima-ducc/trunk/issuesFixed/jira-report.html
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-disk-info.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-status.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-watcher.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/install.tex
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ProcessAccounting.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/helper/DiagnosticsHelper.java

Modified: uima/uima-ducc/trunk/issuesFixed/jira-report.html
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/issuesFixed/jira-report.html?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/issuesFixed/jira-report.html (original)
+++ uima/uima-ducc/trunk/issuesFixed/jira-report.html Wed Jan  4 20:37:26 2017
@@ -1,5 +1,5 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<!-- Generated by Apache Maven Doxia Site Renderer 1.4 at 2016-12-21 -->
+<!-- Generated by Apache Maven Doxia Site Renderer 1.4 at 2017-01-04 -->
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
   <head>
     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
@@ -10,7 +10,7 @@
       @import url("./css/site.css");
     </style>
     <link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
-    <meta name="Date-Revision-yyyymmdd" content="20161221" />
+    <meta name="Date-Revision-yyyymmdd" content="20170104" />
     <meta http-equiv="Content-Language" content="en" />
         
         </head>
@@ -24,7 +24,7 @@
             
                     
                 <div class="xleft">
-        <span id="publishDate">Last Published: 2016-12-21</span>
+        <span id="publishDate">Last Published: 2017-01-04</span>
                   &nbsp;| <span id="projectVersion">Version: ${project.version}</span>
                       </div>
             <div class="xright">        
@@ -161,6 +161,16 @@
 <td>Closed</td>
 <td>UIMA-DUCC: deprecate ducc.agent.node.metrics.sys.gid.max</td></tr>
 <tr class="a">
+<td>Bug</td>
+<td><a class="externalLink" href="https://issues.apache.org/jira/browse/UIMA-5232">UIMA-5232</a></td>
+<td>Closed</td>
+<td>DUCC missing DUCC Book documentation</td></tr>
+<tr class="b">
+<td>Bug</td>
+<td><a class="externalLink" href="https://issues.apache.org/jira/browse/UIMA-5239">UIMA-5239</a></td>
+<td>Closed</td>
+<td>DUCC Web Server (WS) too verbose with logging disk info</td></tr>
+<tr class="a">
 <td>Documentation</td>
 <td><a class="externalLink" href="https://issues.apache.org/jira/browse/UIMA-5158">UIMA-5158</a></td>
 <td>Closed</td>

Added: uima/uima-ducc/trunk/src/main/admin/ducc_disk_info
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc_disk_info?rev=1777371&view=auto
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc_disk_info (added)
+++ uima/uima-ducc/trunk/src/main/admin/ducc_disk_info Wed Jan  4 20:37:26 2017
@@ -0,0 +1,197 @@
+#! /usr/bin/env python
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# +
+# + ducc_disk_info
+# +
+# + purpose: report on DUCC_HOME disk usage
+# + 
+# + input: none (DUCC_HOME implied from location of this script)
+# + 
+# + output: filesystem=<i>% quota=<j>%
+# + 
+# + exit code: the greater of { i, j }
+# +
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import os
+import subprocess
+import sys
+
+from optparse import HelpFormatter
+from optparse import OptionGroup
+from optparse import OptionParser
+
+# ----------------------------------------------
+
+# Extend OptionParser class
+class ExtendedOptionParser(OptionParser):
+    # override epilog formatter so 
+    # that newlines are not deleted!
+    def format_epilog(self, formatter):
+        return self.epilog
+
+# ----------------------------------------------
+
+cmd_df = 'df'
+cmd_quota = 'quota'
+
+code = -1
+message = 'DUCC_HOME: '
+
+# epilog for --help
+def get_epilog():
+    epilog = ''
+    epilog = epilog + '\n'
+    epilog = epilog+'Purpose: display the filesystem and quota usage for the DUCC_HOME directory relative to the location of this script.'
+    epilog = epilog + '\n'
+    return epilog
+
+# parse command line
+def parse_cmdline():
+    global options
+    parser = ExtendedOptionParser(epilog=get_epilog())
+    width = 45
+    parser.formatter.help_position = width
+    parser.formatter.max_help_position = width
+    parser.add_option('--code', action='store_true', dest='flag_code', default=False, 
+        help='display exit code, which is the greater of disk usage percentage and quota usage percentage')
+    parser.add_option('--debug', action='store_true', dest='flag_debug', default=False, 
+        help='display debug information')
+    parser.add_option('--nomsg', action='store_false', dest='flag_msg', default=True, 
+        help='suppress display of filesystem and quota percentages used message')
+    (options, args) = parser.parse_args()
+
+# normalize path to always end with /
+def normalize_path(path):
+    retVal = path
+    if(not path.endswith('/')):
+        retVal = path+'/'
+    return retVal
+
+# normalize quota value to bytes
+def normalize_quota(value):
+    if(value.endswith('M')):
+        tval = value.split('M')[0]
+        retVal = int(tval)*1024*1024
+    elif(value.endswith('k')):
+        tval = value.split('k')[0]
+        retVal = int(tval)*1024
+    else:
+        retVal = int(value)
+    return retVal
+        
+# determine quota usage
+def check_quota():
+    global code
+    global cmd_quota
+    global message
+    global filesystem
+    try:
+        p = subprocess.Popen([cmd_quota, '-A'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = p.communicate()
+        qfs = None
+        data = None
+        fs1 = normalize_path(filesystem)
+        #print fs1
+        for line in out.splitlines():
+            tokens = line.split()
+            #print tokens
+            if(qfs == None):
+                fs2 = normalize_path(tokens[0])
+                #print fs2
+                if(fs1 == fs2):
+                    qfs = fs1
+            elif(data == None):
+                data = tokens
+                break
+        blocks = normalize_quota(data[0])
+        limit = normalize_quota(data[2])
+        qpct = int(blocks/(limit*1.0)*100)
+        #print qfs, blocks, limit, qpct
+        message = message+'quota='+str(qpct)+'%'+' '
+    except Exception,e:
+        emessage = 'Exception: '+str(e)
+        emessage = emessage.strip()
+        if(options.flag_debug):
+            print emessage
+        message = message+'quota='+'N/A'+' '
+    return
+
+# determine filesystem usage
+def check_disk():
+    global ducc_home
+    global code
+    global cmd_df
+    global message
+    global filesystem
+    try:
+        p = subprocess.Popen([cmd_df, ducc_home], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = p.communicate()
+        pctused = 0
+        for line in out.splitlines():
+            tokens = line.split()
+        filesystem = tokens[0].strip()
+        pctused = tokens[4].split('%')[0]
+        number = float(pctused)
+        if(pctused > code):
+            code = pctused
+        message = message+'filesystem='+pctused+'%'+' '
+    except Exception,e:
+        emessage = 'Exception: '+str(e)
+        emessage = emessage.strip()
+        if(options.flag_debug):
+            print emessage
+        message = message+'filesystem='+'N/A'+' '
+    return
+
+# process
+def process():
+    check_disk()
+    check_quota()
+    return
+
+# initialize
+def initialize():
+    global ducc_home
+    pathname = os.path.dirname(sys.argv[0])   
+    ducc_home = pathname.rsplit('/',2)[0]
+    parse_cmdline()
+    return
+
+# main
+def main(argv):  
+    global ducc_home
+    global message
+    global code
+    try:
+        initialize()
+        process()
+    except Exception,e:
+        message = 'Exception: '+str(e)
+    message = message.strip()
+    if(options.flag_msg):
+        print message
+    if(options.flag_code):
+        sys.exit(code)
+    
+if __name__ == '__main__':
+    main(sys.argv[1:])

Propchange: uima/uima-ducc/trunk/src/main/admin/ducc_disk_info
------------------------------------------------------------------------------
    svn:executable = *

Added: uima/uima-ducc/trunk/src/main/scripts/ducc_status
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/scripts/ducc_status?rev=1777371&view=auto
==============================================================================
--- uima/uima-ducc/trunk/src/main/scripts/ducc_status (added)
+++ uima/uima-ducc/trunk/src/main/scripts/ducc_status Wed Jan  4 20:37:26 2017
@@ -0,0 +1,328 @@
+#! /usr/bin/env python
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# +
+# + ducc_status
+# +
+# + purpose: report current ducc daemons status
+# + 
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import datetime
+import sys
+import time
+
+from HTMLParser import HTMLParser
+from optparse import OptionParser
+
+# -----------------------------------------------------------------------
+# Extend OptionParser class
+class ExtendedOptionParser(OptionParser):
+    # override epilog formatter so 
+    # that newlines are not deleted!
+    def format_epilog(self, formatter):
+        return self.epilog
+# -----------------------------------------------------------------------
+# parser for the system.daemons WS page
+class DuccHtmlParser(HTMLParser):
+    
+    tr_state = False
+    daemon_state = None
+    daemon_name = None  
+    daemon_date = None
+    daemon_ip = None
+    daemon_host = None
+    daemons = {}
+    
+    def get_daemons(self):
+        return self.daemons
+    
+    def handle_starttag(self, tag, attrs):
+        if(tag == 'tr' ):
+            self.tr_state = True
+        
+    def handle_endtag(self, tag):   
+        if(tag == 'tr'):
+            self.tr_state = False
+            self.daemon_state = None
+            self.daemon_name = None
+            self.daemon_date = None
+            self.daemon_ip = None
+            self.daemon_host = None
+        
+    def handle_data(self, data):
+        if(self.tr_state):
+            if(self.daemon_state == None):
+                self.daemon_state = data
+            elif(self.daemon_name == None):
+                self.daemon_name = data
+                if(self.daemon_name == 'Agent'):
+                    pass
+                else:
+                    self.daemons[self.daemon_name] = self.daemon_state
+            elif(self.daemon_date == None):
+                self.daemon_date = data
+            elif(self.daemon_ip == None):
+                self.daemon_ip = data
+            elif(self.daemon_host == None):
+                self.daemon_host = data
+                self.daemon_name = data
+                self.daemons[self.daemon_name] = self.daemon_state
+# -----------------------------------------------------------------------
+
+default_host = 'localhost'
+default_port = '42133'
+protocol = 'http://'
+servlet = '/ducc-servlet/classic-system-daemons-data'
+options = None
+
+webserver = 'Webserver'
+head_daemons = [ 'Orchestrator',  'ResourceManager', 'Database', 'Broker', 'ProcessManager', 'ServiceManager', webserver ]
+
+newline = '\n'
+version = '1.0'
+
+# to console
+def to_stdout(text):
+    try:
+        print text
+    except:
+        pass
+    try:
+        sys.stdout.flush()
+    except:
+        pass
+        
+def close():
+    try:
+        sys.stdout.close()
+    except:
+        pass
+    
+# check for log-style formating of text message
+def is_log_format():
+    retVal = False
+    if(not options == None):
+        if(options.flag_log_format):
+            retVal = True
+    return retVal
+
+# produce a time stamp
+def get_timestamp():
+    global options
+    tod = time.time()
+    timestamp = datetime.datetime.fromtimestamp(tod).strftime('%Y-%m-%d %H:%M:%S')   
+    return timestamp
+
+# exception
+def exception(e):
+    to_stdout(str(e))
+
+# error message
+def error(text):
+    prefix = ''
+    if(is_log_format()):
+        type = 'E'
+        prefix = get_timestamp()+' '+type+' '
+    line = prefix+text
+    to_stdout(line)
+
+# info message
+def info(text):
+    prefix = ''
+    if(is_log_format()):
+        type = 'I'
+        prefix = get_timestamp()+' '+type+' '
+    line = prefix+text
+    to_stdout(line)
+
+# debug message
+def debug(text):
+    global options
+    if(not options == None):
+        if(options.flag_debug):
+            prefix = ''
+            if(is_log_format()):
+                type = 'D'
+                prefix = get_timestamp()+' '+type+' '
+            line = prefix+text
+            to_stdout(line)
+
+# trace message
+def trace(text):
+    pass
+
+# exit
+def exit(code,msg=True):
+    if(msg):
+        text = 'exit code='+str(code)
+        error(text)
+    sys.exit(code)
+
+# epilog for --help
+def get_epilog():
+    epilog = ''
+    return epilog
+
+# --target host:port of WS for fetching of daemons status
+def validate_target():
+    global options
+    global default_port
+    global protocol
+    global servlet
+    target = options.target
+    if(':' not in target):
+        target = target+':'+default_port
+    if(target.startswith(protocol)):
+        target = target.replace(protocol,'',1)
+    options.ducc_url_base = protocol+target
+    options.ducc_url_servlet = protocol+target+servlet
+    debug('target: '+options.ducc_url_base)
+    
+# parse command line
+def parse_cmdline():
+    global options
+    global default_host
+    global default_port
+    parser = ExtendedOptionParser(epilog=get_epilog())
+    width = 45
+    parser.formatter.help_position = width
+    parser.formatter.max_help_position = width
+    parser.add_option('-a','--agents', action='store_true', dest='flag_agents', default=False, 
+                               help='include agents')
+    parser.add_option('-d','--debug', action='store_true', dest='flag_debug', default=False, 
+                               help='display debugging messages')
+    parser.add_option('-e','--enumerate', action='store_true', dest='flag_enumerate', default=False, 
+                               help='display each individual daemon status')
+    parser.add_option('-l','--log-format', action='store_true', dest='flag_log_format', default=False, 
+                               help='display in log format')
+    parser.add_option('-t','--target', action='store', dest='target', default=default_host+':'+default_port,
+                               help='<host>:<port> with default of '+default_host+':'+default_port)
+    parser.add_option('-v','--version', action='store_true', dest='flag_version', default=False, 
+                               help='display version of this script')
+    (options, args) = parser.parse_args()
+    if(options.flag_version):
+        info('version='+version)
+        exit(1,msg=False)
+    validate_target()
+
+# fetch current daemons state
+def fetch_state_current():
+    global options
+    import urllib2
+    try:
+        opener = urllib2.build_opener()
+        if(options.flag_agents):
+            opener.addheaders.append(('Cookie', 'DUCCagents=show'))
+        response = opener.open(options.ducc_url_servlet)
+        options.ducc_raw_data = response.read()
+        trace(options.ducc_raw_data)
+    except Exception,e:
+        error('unable to fetch data from '+options.ducc_url_servlet)
+        exception(e)
+        exit(1)
+
+# filter non-head daemons
+def filter():
+    global options
+    global head_daemons
+    if(not options.flag_agents):
+        daemons = options.daemons
+        options.daemons = {}
+        for key in daemons:
+            if(key in head_daemons):
+                options.daemons[key] = daemons[key]
+            else:
+                debug('delete '+key)
+
+# transform raw data               
+def transform():
+    global options
+    global head_daemons
+    try:
+        parser = DuccHtmlParser()
+        parser.feed(options.ducc_raw_data)
+        options.daemons = parser.get_daemons()
+        options.daemons_head = {}
+        options.daemons_agent = {}
+        for key in options.daemons:
+            value = options.daemons[key]
+            if(key in head_daemons):
+                options.daemons_head[key] = value
+            else:
+                options.daemons_agent[key] = value
+        debug(str(options.daemons))
+        filter()
+    except Exception,e:
+        error('unable to transform data from '+options.ducc_url_servlet)
+        exception(e)
+        exit(1)
+
+# summarize
+def summarize():
+    global options
+    global head_daemons
+    options.head_up = 0
+    options.head_down = 0
+    options.agent_up = 0
+    options.agent_down = 0
+    for key in options.daemons_head:
+        value = options.daemons_head[key]
+        if(value == 'up'):
+            options.head_up = options.head_up + 1
+        else:
+            options.head_down = options.head_down + 1
+    for key in options.daemons_agent:
+        value = options.daemons_agent[key]
+        if(value == 'up'):
+            options.agent_up = options.agent_up + 1
+        else:
+            options.agent_down = options.agent_down + 1
+
+# display results
+def display():
+    global options
+    global newline
+    summarize();
+    text0 = options.target
+    text1 = 'head: '+'up='+str(options.head_up)+' '+'down='+str(options.head_down)
+    text2 = 'agent: '+'up='+str(options.agent_up)+' '+'down='+str(options.agent_down)
+    if(options.flag_agents):
+        text = text0+' '+text1+' '+text2
+    else:
+        text = text0+' '+text1
+    output = text
+    info(output)
+    if(options.flag_enumerate):
+        for key in sorted(options.daemons):
+            output = key+'='+options.daemons[key]
+            info(output)
+    
+# report current ducc daemons status
+def main(argv):
+    parse_cmdline()
+    fetch_state_current()
+    transform()
+    display()
+    close()
+
+if __name__ == '__main__':
+    main(sys.argv[1:])

Propchange: uima/uima-ducc/trunk/src/main/scripts/ducc_status
------------------------------------------------------------------------------
    svn:executable = *

Added: uima/uima-ducc/trunk/src/main/scripts/ducc_watcher
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/scripts/ducc_watcher?rev=1777371&view=auto
==============================================================================
--- uima/uima-ducc/trunk/src/main/scripts/ducc_watcher (added)
+++ uima/uima-ducc/trunk/src/main/scripts/ducc_watcher Wed Jan  4 20:37:26 2017
@@ -0,0 +1,590 @@
+#! /usr/bin/env python
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+# +
+# + ducc_watcher
+# +
+# + purpose: send e-mail when a DUCC daemon state changes to not up
+# + 
+# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import ast
+import datetime
+import getpass
+import json
+import logging
+import logging.handlers
+import os
+import smtplib
+import socket
+import string
+import sys
+import time
+import urllib
+
+from HTMLParser import HTMLParser
+
+from optparse import HelpFormatter
+from optparse import OptionGroup
+from optparse import OptionParser
+
+# ----------------------------------------------
+
+# Extend OptionParser class
+class ExtendedOptionParser(OptionParser):
+    # override epilog formatter so 
+    # that newlines are not deleted!
+    def format_epilog(self, formatter):
+        return self.epilog
+
+# ----------------------------------------------
+
+# parser for the system.daemons WS page
+class DuccHtmlParser(HTMLParser):
+    
+    tr_state = False
+    daemon_state = None
+    daemon_name = None  
+    daemon_date = None
+    daemon_ip = None
+    daemon_host = None
+    daemons = {}
+    
+    def get_daemons(self):
+        return self.daemons
+    
+    def handle_starttag(self, tag, attrs):
+        if(tag == 'tr' ):
+            self.tr_state = True
+        
+    def handle_endtag(self, tag):   
+        if(tag == 'tr'):
+            self.tr_state = False
+            self.daemon_state = None
+            self.daemon_name = None
+            self.daemon_date = None
+            self.daemon_ip = None
+            self.daemon_host = None
+        
+    def handle_data(self, data):
+        if(self.tr_state):
+            if(self.daemon_state == None):
+                self.daemon_state = data
+            elif(self.daemon_name == None):
+                self.daemon_name = data
+                if(self.daemon_name == 'Agent'):
+                    pass
+                else:
+                    self.daemons[self.daemon_name] = self.daemon_state
+            elif(self.daemon_date == None):
+                self.daemon_date = data
+            elif(self.daemon_ip == None):
+                self.daemon_ip = data
+            elif(self.daemon_host == None):
+                self.daemon_host = data
+                self.daemon_name = data
+                self.daemons[self.daemon_name] = self.daemon_state
+
+# ----------------------------------------------
+
+name = 'ducc_watcher'
+
+webserver = 'Webserver'
+head_daemons = [ 'Orchestrator',  'ResourceManager', 'Database', 'Broker', 'ProcessManager', 'ServiceManager', webserver ]
+
+flag_info = True
+flag_trace = False
+logger = None
+
+port = '42133'
+
+path = None
+log_file = None
+state_file = None
+
+flag_agents = False
+
+mail_host = 'localhost'
+email_list = None
+
+# produce a time stamp
+def get_timestamp():
+    tod = time.time()
+    timestamp = datetime.datetime.fromtimestamp(tod).strftime('%Y-%m-%d %H:%M:%S')         
+    return timestamp
+
+# get the host running this script
+def get_host():
+    host = socket.gethostname()
+    return host
+
+# get the user running this script
+def get_user():
+    user = getpass.getuser()
+    return user
+
+# make directories, if not already existing
+def mkdirs(path):
+    debug('mkdirs: path='+path)
+    if(os.path.exists(path)):
+        return
+    try:
+        os.makedirs(path)
+    except Exception,e:
+        exception(e)
+    
+# info message to log
+def info(text):
+    global logger
+    type = 'I'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.info(line)
+    return line
+
+# trace message to log
+def trace(text):
+    global logger
+    global flag_trace
+    type = 'T'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    if(flag_trace):
+        logger.debug(line)
+    return line
+
+# debug message to log
+def debug(text):
+    global logger
+    type = 'D'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.debug(line)
+    return line
+
+# error message to log
+def error(text):
+    global logger
+    type = 'E'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.error(line)
+    return line
+
+# warn message to log
+def warn(text):
+    global logger
+    type = 'W'
+    line = get_timestamp()+' '+get_user()+'@'+get_host()+' '+type+' '+text
+    logger.warn(line)
+    return line
+
+# exit
+def exit(code):
+    text = 'exit code='+str(code)
+    email(text)
+    error(text)
+    sys.exit(code)
+
+# exception
+def exception(e):
+    line = error(str(e))
+    return line
+    
+# epilog for --help
+def get_epilog():
+    epilog = ''
+    return epilog
+
+# debug is normally not set
+def validate_debug(options):
+    global logger
+    if(options.flag_debug):
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+
+# consider head node daemons only
+# unless --agents is specified
+def validate_agents(options):
+    global flag_agents
+    if(options.flag_agents):
+        flag_agents = True
+    
+# use /tmp/<userid> as log+state directory 
+# unless --path is specified
+def validate_path(options):
+    if(options.path == None):
+        options.path = '/tmp'+'/'+get_user()
+    mkdirs(options.path)
+
+# setup rotating log file handler with
+# 8 versions of 8M bytes with base name
+# ducc_watcher.<target>.log
+def setup_log_file(options):  
+    global name
+    global target
+    global logger
+    log_file = options.path
+    if(not log_file.endswith('/')):
+        log_file = log_file + '/'
+    log_file = log_file + name + '.' + target +'.log'
+    handler = logging.handlers.RotatingFileHandler(
+        log_file, maxBytes=8*1024*1024, backupCount=8)
+    logger.addHandler(handler)
+    debug('log_file: '+log_file)
+
+# ducc_watcher.<target>.state
+def setup_state_file(options): 
+    global name 
+    global target
+    global state_file
+    state_file = options.path
+    if(not state_file.endswith('/')):
+        state_file = state_file + '/'
+    state_file = state_file + name + '.' + target +'.state'
+    debug('state_file: '+state_file)
+
+# must specify --target host:port of WS for fetching
+# of daemons status
+def validate_target(options):
+    global port
+    global target
+    global ducc_url_base
+    global ducc_url_servlet
+    protocol = 'http://'
+    servlet = '/ducc-servlet/classic-system-daemons-data'
+    if(options.target == None):
+        error('required "target" not specified')
+        exit(1)
+    target = options.target
+    if(':' not in target):
+        target = target+':'+str(port)
+    if(target.startswith(protocol)):
+        target = target.replace(protocol,'',1)
+    ducc_url_base = protocol+target
+    ducc_url_servlet = protocol+target+servlet
+    debug('target: '+ducc_url_base)
+    
+# list of e-mail recipients, if any
+def validate_email_list(options):
+    global email_list
+    if(not options.email_list == None):
+        email_list = options.email_list.split()
+    debug('email-list: '+str(email_list))
+
+# parse command line
+def parse_cmdline():
+    global name
+    parser = ExtendedOptionParser(epilog=get_epilog())
+    width = 45
+    parser.formatter.help_position = width
+    parser.formatter.max_help_position = width
+    parser.add_option('-a','--agents', action='store_true', dest='flag_agents', default=False, 
+                               help='include agents')
+    parser.add_option('-d','--debug', action='store_true', dest='flag_debug', default=False, 
+                               help='display debugging messages')
+    parser.add_option('-e','--email-list', action='store', dest='email_list', default=None, 
+                               help='blank separated list of email addresses to receive down + error notifications')
+    parser.add_option('-p','--path', action='store', dest='path', default=None,
+                               help='path to directory where log and state information are written, default is /tmp'+'/'+get_user())
+    parser.add_option('-t','--target', action='store', dest='target', default=None,
+                               help='[REQUIRED] <host> with default port of '+port+' or <host>:<port>')
+
+    (options, args) = parser.parse_args()
+    # -d
+    validate_debug(options)
+    # -t
+    validate_target(options)
+    # -e
+    validate_email_list(options)
+    # -p
+    validate_path(options)
+    # dependencies
+    setup_log_file(options)
+    setup_state_file(options)
+    # -a
+    validate_agents(options)
+
+# determine if named daemon is one of the head node ones
+def is_head(key):
+    global head_daemons
+    retVal = False
+    if(key in head_daemons):
+        retVal = True   
+    return retVal
+
+# get rid of noise. remove if
+# 1. state is unknown
+# 2. if is agent and agents are not wanted
+def filter(state_dict):
+    global flag_agents
+    retVal = {}
+    for key in state_dict:
+        if(state_dict[key] == 'unknown'):
+            pass
+        else:
+            if(is_head(key)):
+                retVal[key] = state_dict[key]
+            elif(flag_agents):
+                retVal[key] = state_dict[key]
+    return retVal
+
+# summarize state of all ducc daemons
+def summarize(state_dict):
+    global head_daemons
+    retVal = 'up'
+    if(len(state_dict) < len(head_daemons)):
+        retVal = 'down'
+    else:
+        for key in state_dict:
+            if(not state_dict[key] == 'up'):
+                retVal = 'down'
+                break;
+    return retVal
+
+# read precious daemons state
+def read_state_previous():
+    global state_dict_previous
+    global state_file
+    state_dict_previous = {}
+    try:
+        with open(state_file, 'r') as f:
+            s = f.read()
+            state_dict_previous = ast.literal_eval(s)
+            debug('state_previous(read): '+str(state_dict_previous))
+            state_dict_previous = filter(state_dict_previous)
+            debug('state_previous(filter): '+str(state_dict_previous))
+    except Exception,e:
+        error('unable to read state from '+state_file)
+        exception(e)
+
+# write previous daemons state
+def write_state_previous():
+    global state_dict_previous
+    global state_file
+    try:
+        with open(state_file, 'w') as f:
+            f.seek(0)
+            f.write(str(state_dict_previous)+'\n')
+            f.truncate()
+            debug('state_previous(write): '+str(state_dict_previous))
+    except Exception,e:
+        error('unable to write state to '+state_file)
+        exception(e)
+
+# current becomes previous daemons state
+def update_state_previous():
+    global state_dict_previous
+    global state_dict_current
+    state_dict_previous = state_dict_current
+    write_state_previous()
+
+# fetch current daemons state
+def fetch_state_current():
+    global flag_agents
+    global state_dict_current
+    global ducc_url_servlet
+    global webserver
+    state_dict_current = {}
+    try:
+        import urllib2
+        opener = urllib2.build_opener()
+        if(flag_agents):
+            opener.addheaders.append(('Cookie', 'DUCCagents=show'))
+        response = opener.open(ducc_url_servlet)
+        data = response.read()
+        parser = DuccHtmlParser()
+        parser.feed(data)
+        daemons = parser.get_daemons()
+        if(daemons == None):
+            debug('daemons is None')
+        else:
+            for daemon in daemons:
+                status = daemons[daemon]
+                trace(daemon+':'+' '+status+' ')
+                state_dict_current[daemon] = status
+            debug('state_current(read): '+str(state_dict_current))
+            state_dict_current = filter(state_dict_current)
+            debug('state_current(filter): '+str(state_dict_current))
+    except Exception,e:
+        # for WS status to down whenever contact fails
+        daemon = webserver
+        status = 'unreachable'
+        state_dict_current[daemon] = status
+        error('unable to fetch data from '+ducc_url_servlet)
+        exception(e)
+    debug('state_current: '+str(state_dict_current))
+
+# determine state changes between previous and current
+def determine_state_changes():
+    global state_dict_current
+    global state_dict_previous
+    global state_dict_not_up
+    global state_changes_count
+    state_changes_count = 0
+    state_dict_not_up = {}
+    for key in state_dict_current:
+        state_current = state_dict_current.get(key, '?')
+        if(state_current == 'up'):
+            pass
+        else:
+            state_dict_not_up[key] = state_current
+            state_previous = state_dict_previous.get(key, '?')
+            if(state_current == state_previous):
+                pass
+            else:
+                state_changes_count = state_changes_count + 1
+                info(key+' '+'from'+' '+state_previous+' '+'to'+' '+state_current)
+                
+# send email
+def email(HOST, SUBJECT, TO, FROM, TEXT):
+    try:
+        BODY = string.join((
+            "From: %s" % FROM,
+            "To: %s" % TO,
+            "Subject: %s" % SUBJECT ,
+            "",
+            TEXT
+            ), "\r\n")
+        server = smtplib.SMTP(HOST)
+        server.sendmail(FROM, [TO], BODY)
+        server.quit()
+        info('sent: ['+TO+'] '+TEXT)
+    except Exception,e:
+        error('not sent: ['+TO+'] '+TEXT)
+        exception(e)
+
+# send email
+def email_to_list(HOST, SUBJECT, TO_LIST, FROM, TEXT):
+    if(TO_LIST == None):
+        info('e-mail list empty')
+    else:
+        for TO in TO_LIST:
+            email(HOST, SUBJECT, TO, FROM, TEXT)
+        
+# check if all head node daemons are reported
+def is_all_head_daemons():
+    global state_dict_current
+    global head_daemons
+    debug('states: '+str(state_dict_current))
+    debug('daemons: '+str(head_daemons))
+    for daemon in head_daemons:
+        if(daemon in state_dict_current):
+            debug(daemon+' reporting')
+        else:
+            info(daemon+' not reporting')
+            retVal = False
+            break
+    retVal = True
+    debug('all head daemons: '+str(retVal))   
+    return retVal
+
+# check if only webserver is reported
+def is_only_webserver():
+    global state_dict_current
+    global webserver
+    retVal = False
+    len_cur = len(state_dict_current)
+    if(len_cur == 1):
+        if(webserver in state_dict_current):
+            debug(webserver+' only reporting')
+            retVal = True
+    debug('webserver only: '+str(retVal))        
+    return retVal
+
+# not reportable when ducc boot is in progress
+def is_reportable():
+    global head_daemons
+    global state_dict_current
+    retVal = False
+    if(is_only_webserver()):
+        retVal = True
+    elif(is_all_head_daemons()):
+        retVal = True
+    return retVal
+
+# e-mail message subject
+def get_subject(status):
+    global ducc_url_base
+    subject = 'DUCC'+' '+'status='+status+' '+ducc_url_base
+    return subject
+
+# e-mail message body
+def get_body(text):
+    global name
+    global ducc_url_base
+    sender = get_user()+'@'+get_host()
+    body = '['+sender+']'+' '+name+' '+'reports'+' '+ducc_url_base+' '+'state change:'+' '+text
+    return body
+
+# e-mail state changes, if any
+def email_state_changes():
+    global state_dict_current
+    global state_dict_previous
+    global state_dict_not_up
+    global state_changes_count
+    global mail_host
+    global email_list
+    if(is_reportable()):
+        sum_prv = summarize(state_dict_previous)
+        sum_cur = summarize(state_dict_current)
+        status = sum_cur
+        subject = get_subject(status)
+        if(state_changes_count > 0):
+            info('state_changes(count): '+str(state_changes_count))
+            HOST = mail_host
+            SUBJECT = subject
+            TO_LIST = email_list
+            FROM = get_user()+'@'+get_host()
+            TEXT = get_body(str(state_dict_not_up))
+            email_to_list(HOST, SUBJECT, TO_LIST, FROM, TEXT)
+        else:
+            debug('state_changes(count): '+str(state_changes_count))
+            if(sum_cur == sum_prv):
+                debug('state_current(summary): '+str(sum_cur))
+                debug('state_previous(summary): '+str(sum_prv))
+            else:
+                info('state_current(summary): '+str(sum_cur))
+                info('state_previous(summary): '+str(sum_prv))
+                if(sum_cur == 'up'):
+                    HOST = mail_host
+                    SUBJECT = subject
+                    TO_LIST = email_list
+                    FROM = get_user()+'@'+get_host()
+                    TEXT = get_body('All daemons up')
+                    email_to_list(HOST, SUBJECT, TO_LIST, FROM, TEXT)
+    
+# check for DUCC daemon status changes
+def main(argv):
+    global logger
+    try:
+        logger = logging.getLogger('logger')
+        handler = logging.StreamHandler(sys.stdout)
+        logger.addHandler(handler)
+        parse_cmdline()
+        read_state_previous()
+        fetch_state_current()
+        determine_state_changes() 
+        email_state_changes()
+        update_state_previous()
+    except Exception,e:
+        error('exception in main')
+        exception(e)
+             
+if __name__ == '__main__':
+    main(sys.argv[1:])

Propchange: uima/uima-ducc/trunk/src/main/scripts/ducc_watcher
------------------------------------------------------------------------------
    svn:executable = *

Added: uima/uima-ducc/trunk/src/main/scripts/ducc_watcher.crontab.example
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/scripts/ducc_watcher.crontab.example?rev=1777371&view=auto
==============================================================================
--- uima/uima-ducc/trunk/src/main/scripts/ducc_watcher.crontab.example (added)
+++ uima/uima-ducc/trunk/src/main/scripts/ducc_watcher.crontab.example Wed Jan  4 20:37:26 2017
@@ -0,0 +1,25 @@
+
+# Example crontab to backup DUCC database
+
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# -----------------------------------------------------------------------
+
+# Once per minute
+
+*/1 * * * * /home/ducc/ducc_runtime/bin/ducc_watcher --target http://uima-ducc-demo.apache.org:42 --email user1@host1 user2@host2
\ No newline at end of file

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-disk-info.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-disk-info.tex?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-disk-info.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-disk-info.tex Wed Jan  4 20:37:26 2017
@@ -21,7 +21,7 @@
 \else
 \HCode{<a name='DUCC_DISK_INFO'></a>}
 \fi
-    \section{tools/ducc\_disk\_info}
+    \section{ducc\_disk\_info}
     \label{sec:cli.tools-ducc-disk-info}    
 
   \paragraph{Description:}
@@ -29,7 +29,7 @@
     
   \paragraph{Usage:}
     \begin{description}
-    \item[Script] \ducchome/bin/tools/ducc\_disk\_info {\em options}
+    \item[Script] \ducchome/bin/ducc\_disk\_info {\em options}
     \end{description}
     
     \paragraph{Options:}

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-status.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-status.tex?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-status.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-status.tex Wed Jan  4 20:37:26 2017
@@ -21,7 +21,7 @@
 \else
 \HCode{<a name='DUCC_STATUS'></a>}
 \fi
-    \section{tools/ducc\_status}
+    \section{ducc\_status}
     \label{sec:cli.tools-ducc-status}    
 
   \paragraph{Description:}
@@ -29,7 +29,7 @@
     
   \paragraph{Usage:}
     \begin{description}
-    \item[Script] \ducchome/bin/tools/ducc\_status {\em options}
+    \item[Script] \ducchome/bin/ducc\_status {\em options}
     \end{description}
     
     \paragraph{Options:}

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-watcher.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-watcher.tex?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-watcher.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/tools-ducc-watcher.tex Wed Jan  4 20:37:26 2017
@@ -21,7 +21,7 @@
 \else
 \HCode{<a name='DUCC_WATCHER'></a>}
 \fi
-    \section{tools/ducc\_watcher}
+    \section{ducc\_watcher}
     \label{sec:cli.tools-ducc-watcher}    
 
   \paragraph{Description:}
@@ -29,7 +29,7 @@
    
   \paragraph{Usage:}
     \begin{description}
-    \item[Script] \ducchome/bin/tools/ducc\_watcher {\em options}
+    \item[Script] \ducchome/bin/ducc\_watcher {\em options}
     \end{description}
     
   \paragraph{Options:}

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/install.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/install.tex?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/install.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/install.tex Wed Jan  4 20:37:26 2017
@@ -593,7 +593,7 @@ Red Hat Enterprise Linux Workstation rel
 
 \begin{verbatim}
 
-$DUCC_HOME/bin/tools/ducc_watcher is a Python script that, when run, contacts the 
+$DUCC_HOME/bin/ducc_watcher is a Python script that, when run, contacts the 
 DUCC Web Server to fetch data and determine the status of the critical head node daemons.
 It can be run as a cron job to detect down daemons and send email notifications
 to a list of receipients specified via command invocation option.

Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ProcessAccounting.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ProcessAccounting.java?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ProcessAccounting.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ProcessAccounting.java Wed Jan  4 20:37:26 2017
@@ -174,7 +174,8 @@ public class ProcessAccounting {
 					//OK
 				}
 				else {
-					logger.warn(methodName, dw.getDuccId(), inventoryProcess.getDuccId(), "PID"+" "+"old:"+oldValue+" "+"new:"+newValue+" "+"keeping old");
+					logger.warn(methodName, dw.getDuccId(), inventoryProcess.getDuccId(), "PID"+" "+"old:"+oldValue+" "+"new:"+newValue+" "+"replacing old");
+					process.setPID(newValue);
 				}
 			}
 		}

Modified: uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/helper/DiagnosticsHelper.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/helper/DiagnosticsHelper.java?rev=1777371&r1=1777370&r2=1777371&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/helper/DiagnosticsHelper.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/helper/DiagnosticsHelper.java Wed Jan  4 20:37:26 2017
@@ -45,8 +45,6 @@ public class DiagnosticsHelper {
 							+File.separator
 							+"bin"
 							+File.separator
-							+"tools"
-							+File.separator
 							+"ducc_disk_info"
 							;
 				String[] command = { path };