You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ja...@apache.org on 2013/11/26 05:36:09 UTC
git commit: updated refs/heads/4.3 to 8a29b70
Updated Branches:
refs/heads/4.3 483f304ef -> 8a29b7000
Monitoring python script organized into more methods
Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/8a29b700
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/8a29b700
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/8a29b700
Branch: refs/heads/4.3
Commit: 8a29b7000c01d0516824d33074d1c42f64c78b9e
Parents: 483f304
Author: Jayapal <ja...@apache.org>
Authored: Tue Nov 26 09:49:22 2013 +0530
Committer: Jayapal <ja...@apache.org>
Committed: Tue Nov 26 09:49:22 2013 +0530
----------------------------------------------------------------------
.../debian/config/root/monitorServices.py | 159 ++++++++++---------
1 file changed, 81 insertions(+), 78 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cloudstack/blob/8a29b700/systemvm/patches/debian/config/root/monitorServices.py
----------------------------------------------------------------------
diff --git a/systemvm/patches/debian/config/root/monitorServices.py b/systemvm/patches/debian/config/root/monitorServices.py
index 4e1b7e0..3646c81 100755
--- a/systemvm/patches/debian/config/root/monitorServices.py
+++ b/systemvm/patches/debian/config/root/monitorServices.py
@@ -135,6 +135,49 @@ def isPidMatchPidFile(pidfile, pids):
fd.close()
return StatusCodes.FAILED
+def checkProcessRunningStatus(process_name, pidFile):
+ printd("checking the process " + process_name)
+ cmd = ''
+ pids = []
+ cmd = 'pidof ' + process_name
+ printd(cmd)
+
+ #cmd = 'service ' + process_name + ' status'
+ pout = Popen(cmd, shell=True, stdout=PIPE)
+ exitStatus = pout.wait()
+ temp_out = pout.communicate()[0]
+
+ #check there is only one pid or not
+ if exitStatus == 0:
+ pids = temp_out.split(' ')
+ printd("pid(s) of process %s are %s " %(process_name, pids))
+
+ #there is more than one process so match the pid file
+ #if not matched set pidFileMatched=False
+ printd("Checking pid file")
+ if isPidMatchPidFile(pidFile, pids) == StatusCodes.SUCCESS:
+ return True,pids;
+
+ printd("pid of exit status %s" %exitStatus)
+
+ return False,pids;
+
+def restartService(service_name):
+
+ cmd = 'service ' + service_name + ' restart'
+ cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
+ return_val = cout.wait()
+
+ if return_val == 0:
+ printd("The service " + service_name +" recovered successfully ")
+ msg="The process " +service_name+" is recovered successfully "
+ raisealert(Log.INFO,msg,service_name)
+ return True
+ else:
+ printd("process restart failed ....")
+
+ return False
+
def checkProcessStatus( process ):
@@ -152,56 +195,28 @@ def checkProcessStatus( process ):
if process_name is None:
printd ("\n Invalid Process Name")
return StatusCodes.INVALID_INP
- else:
- printd("checking the process " + process_name)
- cmd = 'pidof ' + process_name
- printd(cmd)
- #cmd = 'service ' + process_name + ' status'
- pout = Popen(cmd, shell=True, stdout=PIPE)
- exitStatus = pout.wait()
- temp_out = pout.communicate()[0]
-
- #check there is only one pid or not
- if exitStatus == 0:
- pids = temp_out.split(' ')
- msg="pids: " +temp_out;
- printd(msg)
- #there is more than one process so match the pid file
- #if not matched set pidFileMatched=False
- printd("Checking pid file")
- if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
- pidFileMatched = True;
- else:
- pidFileMatched = False;
+ status, pids = checkProcessRunningStatus(process_name, pidfile)
- if exitStatus == 0 and pidFileMatched == True:
+ if status == True:
printd("The process is running ....")
return StatusCodes.RUNNING
else:
- printd('exit status:'+str(exitStatus))
- msg="The process " + process_name +" is not running trying recover "
- printd(msg)
+ printd("Process %s is not running trying to recover" %process_name)
#Retry the process state for few seconds
+
for i in range(1, Config.RETRY_ITERATIONS):
- pout = Popen(cmd, shell=True, stdout=PIPE)
- exitStatus = pout.wait()
- temp_out = pout.communicate()[0]
+ time.sleep(Config.SLEEP_SEC)
if i < Config.RETRY_FOR_RESTART: # this is just for trying few more times
- if exitStatus == 0:
- pids = temp_out.split(' ')
-
- if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
- pidFileMatched = True;
- printd("pid file is matched ...")
- raisealert(Log.ALERT, "The process detected as running", process_name)
- break
- else:
- printd("pid file is not matched ...")
- pidFileMatched = False;
- time.sleep(Config.SLEEP_SEC)
- continue
+
+ status, pids = checkProcessRunningStatus(process_name, pidfile)
+ if status == True:
+ raisealert(Log.ALERT, "The process detected as running", process_name)
+ break
+ else:
+ printd("Process %s is not running checking the status again..." %process_name)
+ continue
else:
msg="The process " +process_name+" is not running trying recover "
raisealert(Log.INFO,process_name,msg)
@@ -213,25 +228,10 @@ def checkProcessStatus( process ):
printd(cmd)
Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
- cmd = 'service ' + service_name + ' restart'
-
- time.sleep(Config.SLEEP_SEC)
- #return_val= check_call(cmd , shell=True)
-
- cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
- return_val = cout.wait()
-
- if return_val == 0:
- printd("The process" + process_name +" recovered successfully ")
- msg="The process " +process_name+" is recovered successfully "
- raisealert(Log.INFO,msg,process_name)
-
- break;
+ if restartService(service_name) == True:
+ break
else:
- #retry restarting the process for few tries
- printd("process restart failing trying again ....")
- restartFailed=True
- time.sleep(Config.SLEEP_SEC)
+ restartFailed = True
continue
#for end here
@@ -255,6 +255,7 @@ def monitProcess( processes_info ):
dict_unmonit={}
umonit_update={}
+ unMonitPs=False
if not path.isfile(Config.UNMONIT_PS_FILE):
printd('Unmonit File not exist')
@@ -265,42 +266,48 @@ def monitProcess( processes_info ):
#time for noting process down time
csec = repr(time.time()).split('.')[0]
- unMonitPs=False
-
for process,properties in processes_info.items():
#skip the process it its time stamp less than Config.MONIT_AFTER_MINS
- printd ("checking the process %s \n" %process)
+ printd ("checking the service %s \n" %process)
if not is_emtpy(dict_unmonit):
if dict_unmonit.has_key(process):
ts = dict_unmonit[process]
- printd("Time difference=%s" %str(int(csec) - int(ts)))
- tmin = (int(csec) - int(ts) )/60
- if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS:
- raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS))
- printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin)))
- unMonitPs=True
+ if checkPsTimeStampForMonitor (csec, ts, properties) == False:
+ unMonitPs = True
continue
if checkProcessStatus( properties) != StatusCodes.RUNNING:
- printd( "\n Process %s is not Running"%process)
+ printd( "\n Service %s is not Running"%process)
#add this process into unmonit list
- printd ("updating the process for unmonit %s\n" %process)
+ printd ("updating the service for unmonit %s\n" %process)
umonit_update[process]=csec
-
#if dict is not empty write to file else delete it
if not is_emtpy(umonit_update):
writePsListToUnmonitFile(umonit_update)
else:
if is_emtpy(umonit_update) and unMonitPs == False:
#delete file it is there
- if path.isfile(Config.UNMONIT_PS_FILE):
- printd("Removing the file %s" %Config.UNMONIT_PS_FILE)
- os.remove(Config.UNMONIT_PS_FILE)
+ removeFile(Config.UNMONIT_PS_FILE)
+
+def checkPsTimeStampForMonitor(csec,ts, process):
+ printd("Time difference=%s" %str(int(csec) - int(ts)))
+ tmin = (int(csec) - int(ts) )/60
+ if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS:
+ raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS))
+ printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin)))
+ return False
+
+ return True
+
+def removeFile(fileName):
+ if path.isfile(fileName):
+ printd("Removing the file %s" %fileName)
+ os.remove(fileName)
def loadPsFromUnMonitFile():
@@ -358,18 +365,14 @@ def main():
'''
Step1 : Get Config
'''
-
printd("monitoring started")
temp_dict = getConfig()
-
'''
Step2: Monitor and Raise Alert
'''
- #raisealert(Log.INFO, 'Monit started')
monitProcess( temp_dict )
-
if __name__ == "__main__":
main()