You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ja...@apache.org on 2013/11/26 05:36:09 UTC

git commit: updated refs/heads/4.3 to 8a29b70

Updated Branches:
  refs/heads/4.3 483f304ef -> 8a29b7000


Monitoring python script organized into more methods


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/8a29b700
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/8a29b700
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/8a29b700

Branch: refs/heads/4.3
Commit: 8a29b7000c01d0516824d33074d1c42f64c78b9e
Parents: 483f304
Author: Jayapal <ja...@apache.org>
Authored: Tue Nov 26 09:49:22 2013 +0530
Committer: Jayapal <ja...@apache.org>
Committed: Tue Nov 26 09:49:22 2013 +0530

----------------------------------------------------------------------
 .../debian/config/root/monitorServices.py       | 159 ++++++++++---------
 1 file changed, 81 insertions(+), 78 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/8a29b700/systemvm/patches/debian/config/root/monitorServices.py
----------------------------------------------------------------------
diff --git a/systemvm/patches/debian/config/root/monitorServices.py b/systemvm/patches/debian/config/root/monitorServices.py
index 4e1b7e0..3646c81 100755
--- a/systemvm/patches/debian/config/root/monitorServices.py
+++ b/systemvm/patches/debian/config/root/monitorServices.py
@@ -135,6 +135,49 @@ def isPidMatchPidFile(pidfile, pids):
     fd.close()
     return StatusCodes.FAILED
 
+def checkProcessRunningStatus(process_name, pidFile):
+    printd("checking the process " + process_name)
+    cmd = ''
+    pids = []
+    cmd = 'pidof ' + process_name
+    printd(cmd)
+
+    #cmd = 'service ' + process_name + ' status'
+    pout = Popen(cmd, shell=True, stdout=PIPE)
+    exitStatus = pout.wait()
+    temp_out = pout.communicate()[0]
+
+    #check there is only one pid or not
+    if exitStatus == 0:
+        pids = temp_out.split(' ')
+        printd("pid(s) of process %s are %s " %(process_name, pids))
+
+        #there is more than one process so match the pid file
+        #if not matched set pidFileMatched=False
+        printd("Checking pid file")
+        if isPidMatchPidFile(pidFile, pids) == StatusCodes.SUCCESS:
+            return True,pids;
+
+    printd("pid of exit status %s" %exitStatus)
+
+    return False,pids;
+
+def restartService(service_name):
+
+    cmd = 'service ' + service_name + ' restart'
+    cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
+    return_val = cout.wait()
+
+    if return_val == 0:
+        printd("The service " + service_name +" recovered successfully ")
+        msg="The process " +service_name+" is recovered successfully "
+        raisealert(Log.INFO,msg,service_name)
+        return True
+    else:
+        printd("process restart failed ....")
+
+    return False
+
 
 
 def checkProcessStatus( process ):
@@ -152,56 +195,28 @@ def checkProcessStatus( process ):
     if process_name is None:
         printd ("\n Invalid Process Name")
         return StatusCodes.INVALID_INP
-    else:
-        printd("checking the process " + process_name)
-        cmd = 'pidof ' + process_name
-        printd(cmd)
-        #cmd = 'service ' + process_name + ' status'
-        pout = Popen(cmd, shell=True, stdout=PIPE)
-        exitStatus = pout.wait()
-        temp_out = pout.communicate()[0]
-
-    #check there is only one pid or not
-    if exitStatus == 0:
-        pids = temp_out.split(' ')
-        msg="pids: " +temp_out;
-        printd(msg)
 
-        #there is more than one process so match the pid file
-        #if not matched set pidFileMatched=False
-        printd("Checking pid file")
-        if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
-            pidFileMatched = True;
-        else:
-            pidFileMatched = False;
+    status, pids = checkProcessRunningStatus(process_name, pidfile)
 
-    if exitStatus == 0 and pidFileMatched == True:
+    if status == True:
         printd("The process is running ....")
         return  StatusCodes.RUNNING
     else:
-        printd('exit status:'+str(exitStatus))
-        msg="The process " + process_name +" is not running trying recover "
-        printd(msg)
+        printd("Process %s is not running trying to recover" %process_name)
         #Retry the process state for few seconds
+
         for i in range(1, Config.RETRY_ITERATIONS):
-            pout = Popen(cmd, shell=True, stdout=PIPE)
-            exitStatus = pout.wait()
-            temp_out = pout.communicate()[0]
+            time.sleep(Config.SLEEP_SEC)
 
             if i < Config.RETRY_FOR_RESTART: # this is just for trying few more times
-                if exitStatus == 0:
-                    pids = temp_out.split(' ')
-
-                    if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
-                        pidFileMatched = True;
-                        printd("pid file is matched ...")
-                        raisealert(Log.ALERT, "The process detected as running", process_name)
-                        break
-                    else:
-                        printd("pid file is not matched ...")
-                        pidFileMatched = False;
-                        time.sleep(Config.SLEEP_SEC)
-                        continue
+
+                status, pids = checkProcessRunningStatus(process_name, pidfile)
+                if status == True:
+                    raisealert(Log.ALERT, "The process detected as running", process_name)
+                    break
+                else:
+                    printd("Process %s is not running checking the status again..." %process_name)
+                    continue
             else:
                 msg="The process " +process_name+" is not running trying recover "
                 raisealert(Log.INFO,process_name,msg)
@@ -213,25 +228,10 @@ def checkProcessStatus( process ):
                         printd(cmd)
                         Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
 
-                cmd = 'service ' + service_name + ' restart'
-
-                time.sleep(Config.SLEEP_SEC)
-                #return_val= check_call(cmd , shell=True)
-
-                cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
-                return_val = cout.wait()
-
-                if return_val == 0:
-                    printd("The process" + process_name +" recovered successfully ")
-                    msg="The process " +process_name+" is recovered successfully "
-                    raisealert(Log.INFO,msg,process_name)
-
-                    break;
+                if restartService(service_name) == True:
+                    break
                 else:
-                    #retry restarting the process for few tries
-                    printd("process restart failing trying again ....")
-                    restartFailed=True
-                    time.sleep(Config.SLEEP_SEC)
+                    restartFailed = True
                     continue
         #for end here
 
@@ -255,6 +255,7 @@ def monitProcess( processes_info ):
 
     dict_unmonit={}
     umonit_update={}
+    unMonitPs=False
 
     if not path.isfile(Config.UNMONIT_PS_FILE):
         printd('Unmonit File not exist')
@@ -265,42 +266,48 @@ def monitProcess( processes_info ):
     #time for noting process down time
     csec = repr(time.time()).split('.')[0]
 
-    unMonitPs=False
-
     for process,properties in processes_info.items():
         #skip the process it its time stamp less than Config.MONIT_AFTER_MINS
-        printd ("checking the process %s \n" %process)
+        printd ("checking the service %s \n" %process)
 
         if not is_emtpy(dict_unmonit):
             if dict_unmonit.has_key(process):
                 ts = dict_unmonit[process]
-                printd("Time difference=%s" %str(int(csec) - int(ts)))
-                tmin = (int(csec) - int(ts) )/60
 
-                if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS:
-                    raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS))
-                    printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin)))
-                    unMonitPs=True
+                if checkPsTimeStampForMonitor (csec, ts, properties) == False:
+                    unMonitPs = True
                     continue
 
         if checkProcessStatus( properties) != StatusCodes.RUNNING:
-            printd( "\n Process %s is not Running"%process)
+            printd( "\n Service %s is not Running"%process)
             #add this process into unmonit list
-            printd ("updating the process for unmonit %s\n" %process)
+            printd ("updating the service for unmonit %s\n" %process)
             umonit_update[process]=csec
 
-
     #if dict is not empty write to file else delete it
     if not is_emtpy(umonit_update):
         writePsListToUnmonitFile(umonit_update)
     else:
         if is_emtpy(umonit_update) and unMonitPs == False:
             #delete file it is there
-            if path.isfile(Config.UNMONIT_PS_FILE):
-                printd("Removing the file %s" %Config.UNMONIT_PS_FILE)
-                os.remove(Config.UNMONIT_PS_FILE)
+            removeFile(Config.UNMONIT_PS_FILE)
+
 
+def checkPsTimeStampForMonitor(csec,ts, process):
+    printd("Time difference=%s" %str(int(csec) - int(ts)))
+    tmin = (int(csec) - int(ts) )/60
 
+    if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS:
+        raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS))
+        printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin)))
+        return False
+
+    return  True
+
+def removeFile(fileName):
+    if path.isfile(fileName):
+        printd("Removing the file %s" %fileName)
+        os.remove(fileName)
 
 def loadPsFromUnMonitFile():
 
@@ -358,18 +365,14 @@ def main():
     '''
     Step1 : Get Config
     '''
-
     printd("monitoring started")
     temp_dict  = getConfig()
 
-
     '''
     Step2: Monitor and Raise Alert
     '''
-    #raisealert(Log.INFO, 'Monit started')
     monitProcess( temp_dict )
 
-
 if __name__ == "__main__":
     main()