You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2018/10/12 12:10:47 UTC
svn commit: r1843654 - /uima/uima-ducc/trunk/src/main/admin/stop_ducc
Author: degenaro
Date: Fri Oct 12 12:10:47 2018
New Revision: 1843654
URL: http://svn.apache.org/viewvc?rev=1843654&view=rev
Log:
UIMA-5875 DUCC admin command stop_ducc should accept --all --stop, which comprises --agents followed by --head
--all --stop will now stop all daemons listed in DB, including remote head node daemons.
However, the command runs slowly since a new JVM is launched to update DB for each daemon. Seems like an opportunity for improvement...
Modified:
uima/uima-ducc/trunk/src/main/admin/stop_ducc
Modified: uima/uima-ducc/trunk/src/main/admin/stop_ducc
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/stop_ducc?rev=1843654&r1=1843653&r2=1843654&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/stop_ducc (original)
+++ uima/uima-ducc/trunk/src/main/admin/stop_ducc Fri Oct 12 12:10:47 2018
@@ -229,7 +229,7 @@ class StopDucc(DuccUtil):
epilog = epilog+'> '+self._fn()+' '+self.option_component+' '+self.c_or+'@nodeX3'+' '+self.option_kill
return epilog
- help_all = 'Stop all DUCC management and agent processes by using database entries recorded by start_ducc. Only allowed if '+option_kill+' option is also specified.'
+ help_all = 'Stop all DUCC management and agent processes by using database entries recorded by start_ducc.'
help_head = 'Stop the DUCC management processes on the present head node by using database entries recorded by start_ducc.'
help_agents = 'Stop the DUCC agents processes on all nodes by using database entries recorded by '+cmd_start_ducc+'.'
help_nodelist = 'Stop agents on the nodes in the nodefile. Multiple nodefiles may be specified.'
@@ -239,7 +239,7 @@ class StopDucc(DuccUtil):
+ ' Specification of broker or database is disallowed unless that component is automanaged by '+kw_DUCC+'.'
help_kill = 'Stop the component(s) forcibly and immediately using '+cmd_ssh+' with '+cmd_kill_9+'. Use this only if a normal stop does not work (e.g. the process may be hung).'
help_stop = 'Stop the component(s) gracefully using broadcast. Agents allow children specified time (in seconds) to exit. Default is '+str(default_stop)+'.'\
- + ' Broadcast is not used for broker and database, instead a direct kill -15 is employed.'
+ + ' Broadcast is not used for broker, database, and remote head node daemons; instead a direct kill -15 is employed.'
help_quiesce = 'Stop the component(s) gracefully using broadcast. Agents exit only when no children exist. Children are given infinite time to exit.'
help_maxthreads = 'Maximum concurrent threads. Default = '+str(maxthreads)+'.'
help_debug = 'Display debugging messages.'
@@ -267,8 +267,9 @@ class StopDucc(DuccUtil):
elif(self.args.stop):
if(self.args.maxthreads == None):
self.args.maxthreads = 2
- else:
- self.parser.error(self.option_maxthreads+' requires '+self.option_kill)
+ elif(self.args.quiesce_then_stop):
+ if(self.args.maxthreads == None):
+ self.args.maxthreads = 2
elif(self.args.maxthreads != None):
self.parser.error(self.option_maxthreads+' requires '+self.option_kill)
# debug
@@ -285,6 +286,8 @@ class StopDucc(DuccUtil):
def get_db_list(self):
if(self.db_list == None):
self.db_list = self.db_acct_query()
+ text = 'list='+str(list)
+ debug(self._mn(),text)
return self.db_list
# --all
@@ -442,9 +445,13 @@ class StopDucc(DuccUtil):
break
return pid
+ def acct_stop(self,node,component):
+ print 'stop: '+component+'@'+node
+ self.db_acct_stop(node,component)
+
# target=kill
def kill(self,count,tid,node,component,signal):
- self.db_acct_stop(node,component)
+ self.acct_stop(node,component)
verbosity=False
ssh = self.ssh_operational(node,verbosity)
state = 'state=pending'
@@ -472,7 +479,7 @@ class StopDucc(DuccUtil):
self.pool.release()
# launch threads to perform kills
- def kill_threads(self,list):
+ def kill_threads(self,list,signal):
size = len(list)
msg = 'daemons='+str(len(list))
output(msg)
@@ -486,25 +493,42 @@ class StopDucc(DuccUtil):
count = count+1
self.pool.acquire()
tid = self.get_tid()
- signal = self.sig9
t = Thread(target=self.kill, args=(count,tid,node,component,signal))
t.start()
-
+
+ def filter_remote_head(self,list):
+ list_remote_head = []
+ list_remainder = []
+ this_node = self.get_node_name()
+ for item in list:
+ node = item[0]
+ com = item[1]
+ if(com == self.c_ag):
+ list_remainder.append(item)
+ elif(node == this_node):
+ list_remainder.append(item)
+ else:
+ list_remote_head.append(item)
+ return list_remote_head, list_remainder
+
# target=stop
- def stop(self,list):
+ def stop(self,list,qflag):
text = 'list='+str(list)
debug(self._mn(),text)
- # validate
- self.head_on_node_only(list)
+ # get 2 lists
+ list_remote_head, list = self.filter_remote_head(list)
+ # stop remote head(s)
+ if(len(list_remote_head)>0):
+ signal = self.sig15
+ self.kill_threads(list_remote_head, signal)
# update database + build admin string
- self.threads_prep()
admin = ''
stop_db = False
stop_broker = False
for item in list:
node = item[0]
com = item[1]
- self.db_acct_stop(node,com)
+ self.acct_stop(node,com)
component = self.longname[com]
if(component == self.c_broker):
stop_broker = True
@@ -515,13 +539,16 @@ class StopDucc(DuccUtil):
else:
component = self.longname[com]
admin = admin+component+'@'+node+' '
- self.db_acct_stop(node,component)
# issue command
admin = admin.strip()
if(len(admin) > 0):
admin = str(self.args.stop)+' '+admin
- print "stop: "+admin
- self.ducc_admin('--stop',admin)
+ if(qflag):
+ print 'quiesce: '+admin
+ self.ducc_admin('--quiesce',admin)
+ else:
+ print 'stop: '+admin
+ self.ducc_admin('--stop',admin)
# stop broker
if(stop_broker):
self.stop_broker()
@@ -529,37 +556,6 @@ class StopDucc(DuccUtil):
if(stop_db):
self.db_stop()
- # target=quiesce
- def quiesce(self,list):
- text = 'list='+str(list)
- debug(self._mn(),text)
- # validate
- self.agent_only(list)
- # update database + build admin string
- admin = ''
- for item in list:
- node = item[0]
- com = item[1]
- self.db_acct_stop(node,com)
- component = self.longname[com]
- admin = admin+component+'@'+node+' '
- # issue command
- admin = admin.strip()
- if(len(admin) > 0):
- print "quiesce: "+admin
- self.ducc_admin('--quiesce',admin)
-
- # only head node component on present node allowed
- def head_on_node_only(self,list):
- head = self.get_node_name()
- for item in list:
- component = item[1]
- if(component != self.n_ag):
- node = item[0]
- if(node != head):
- 'invalid node='+node+' for component='+component
- self._exit()
-
# only agent component allowed
def agent_only(self,list):
for item in list:
@@ -599,14 +595,10 @@ class StopDucc(DuccUtil):
# main
def main(self,argv):
self.get_args()
+ self.threads_prep()
# get list of nodes+daemons
if(self.args.all):
- if(self.args.kill):
- list = self.all()
- else:
- msg = 'cannot specify '+self.option_all+' unless '+self.option_kill+' is also specified.'
- output(msg)
- self._exit()
+ list = self.all()
elif(self.args.head):
list = self.head()
elif(self.args.agents):
@@ -625,12 +617,12 @@ class StopDucc(DuccUtil):
self.enforce_location_limits(list)
# perform action
if(self.args.kill):
- self.threads_prep()
- self.kill_threads(list)
+ signal = self.sig9
+ self.kill_threads(list,signal)
elif(self.args.stop != None):
- self.stop(list)
+ self.stop(list,False)
elif(self.args.quiesce_then_stop):
- self.quiesce(list)
+ self.stop(list,True)
else:
self._help()