You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by ro...@apache.org on 2018/03/15 11:02:21 UTC

[cloudstack] branch 4.11 updated: CLOUDSTACK-10296: Find time different from last timestamp (#2458)

This is an automated email from the ASF dual-hosted git repository.

rohit pushed a commit to branch 4.11
in repository https://gitbox.apache.org/repos/asf/cloudstack.git


The following commit(s) were added to refs/heads/4.11 by this push:
     new ab0bce2  CLOUDSTACK-10296: Find time different from last timestamp (#2458)
ab0bce2 is described below

commit ab0bce2a1baa4e199a447e3f2d74ca93c026a037
Author: Rohit Yadav <ro...@apache.org>
AuthorDate: Thu Mar 15 16:32:18 2018 +0530

    CLOUDSTACK-10296: Find time different from last timestamp (#2458)
    
    This fixes a difference issue in rVR heartbeat check script raised
    recently on dev@.
    Reduce logging to avoid logging to fill ramdisk
    Make checkrouter return fault state when keepalived is not running
    
    Signed-off-by: Rohit Yadav <ro...@shapeblue.com>
---
 systemvm/debian/opt/cloud/bin/checkrouter.sh       |  7 ++
 .../opt/cloud/templates/check_heartbeat.sh.templ   | 78 ++++++++++++----------
 2 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/systemvm/debian/opt/cloud/bin/checkrouter.sh b/systemvm/debian/opt/cloud/bin/checkrouter.sh
index 0a9041b..bb6c9f8 100755
--- a/systemvm/debian/opt/cloud/bin/checkrouter.sh
+++ b/systemvm/debian/opt/cloud/bin/checkrouter.sh
@@ -17,6 +17,13 @@
 # under the License.
 
 STATUS=UNKNOWN
+
+if [ "$(systemctl is-active keepalived)" != "active" ]
+then
+    echo "Status: FAULT"
+    exit
+fi
+
 ROUTER_TYPE=$(cat /etc/cloudstack/cmdline.json | grep type | awk '{print $2;}' | sed -e 's/[,\"]//g')
 if [ "$ROUTER_TYPE" = "router" ]
 then
diff --git a/systemvm/debian/opt/cloud/templates/check_heartbeat.sh.templ b/systemvm/debian/opt/cloud/templates/check_heartbeat.sh.templ
index 2ab9aba..62a2b18 100755
--- a/systemvm/debian/opt/cloud/templates/check_heartbeat.sh.templ
+++ b/systemvm/debian/opt/cloud/templates/check_heartbeat.sh.templ
@@ -16,48 +16,52 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ROUTER_BIN_PATH=/ramdisk/rrouter
-ROUTER_LOG=${ROUTER_BIN_PATH}/keepalived.log
+ROUTER_BIN_PATH="/ramdisk/rrouter"
+ROUTER_LOG="${ROUTER_BIN_PATH}/keepalived.log"
 STRIKE_FILE="$ROUTER_BIN_PATH/keepalived.strikes"
+TS_FILE="$ROUTER_BIN_PATH/keepalived.ts"
+CT_FILE="$ROUTER_BIN_PATH/keepalived.ct"
 
-if [ -e $ROUTER_BIN_PATH/keepalived.ts2 ]
+checktime=$(date +%s)
+hbtime=$(cat $TS_FILE)
+diff=$(($checktime - $hbtime))
+
+lastcheck=0
+if [ -e $CT_FILE ]
+then
+    lastcheck=$(cat $CT_FILE 2>/dev/null)
+fi
+checkdiff=$(($checktime - $lastcheck))
+if [ $checkdiff -ge 0 ] && [ $checkdiff -lt 30 ]
+then
+    exit
+fi
+echo $checktime > $CT_FILE
+
+s=0
+if [ $diff -gt 10 ]
 then
-    thistime=$(cat $ROUTER_BIN_PATH/keepalived.ts)
-    lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2)
-    diff=$(($lasttime - $thistime))
-    s=0
-    if [ $diff -ge 10 ]
+    if [ -e $STRIKE_FILE ]
     then
-        if [ -e $STRIKE_FILE ]
-        then
-            s=`cat $STRIKE_FILE 2>/dev/null`
-        fi
-        s=$(($s+1))
-        echo $s > $STRIKE_FILE
-    else
-        if [ -e $STRIKE_FILE ]
-        then
-            rm $STRIKE_FILE
-            echo keepalived.strikes file was removed! >> $ROUTER_LOG
-        else
-            echo keepalived.strikes file does not exist! >> $ROUTER_LOG
-        fi
+        s=$(cat $STRIKE_FILE 2>/dev/null)
     fi
-    #3 strikes rule
-    if [ $s -gt 2 ]
-    then
-        echo Keepalived process is dead! >> $ROUTER_LOG
-        systemctl stop keepalived >> $ROUTER_LOG 2>&1
-        systemctl stop conntrackd >> $ROUTER_LOG 2>&1
+    s=$(($s+1))
+    echo $s > $STRIKE_FILE
+    echo "Check time: $checktime, last heartbeat time: $hbtime, time diff: $diff, strike count: $s" >> $ROUTER_LOG
+else
+    rm -f $STRIKE_FILE
+fi
+
+if [ $s -gt 3 ]
+then
+    systemctl stop --now keepalived >> $ROUTER_LOG 2>&1
+    systemctl stop --now conntrackd >> $ROUTER_LOG 2>&1
 
-        #Set fault so we have the same effect as a KeepaliveD fault.
-        python /opt/cloud/bin/master.py --fault
+    #Set fault so we have the same effect as a KeepaliveD fault.
+    python /opt/cloud/bin/master.py --fault
 
-        pkill -9 keepalived >> $ROUTER_LOG 2>&1
-        pkill -9 conntrackd >> $ROUTER_LOG 2>&1
-        echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
-        exit
-    fi
+    pkill -9 keepalived >> $ROUTER_LOG 2>&1 || true
+    pkill -9 conntrackd >> $ROUTER_LOG 2>&1 || true
+    echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
+    exit
 fi
-
-cp $ROUTER_BIN_PATH/keepalived.ts $ROUTER_BIN_PATH/keepalived.ts2

-- 
To stop receiving notification emails like this one, please contact
rohit@apache.org.