You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cloudstack.apache.org by bh...@apache.org on 2015/03/17 11:26:51 UTC

[49/50] git commit: updated refs/heads/master to 3c429ee

If master gets called and then within a few seconds backup (this can happen during provisioning), the master process will complete after the backup one.  Not what we want.  These commands must be serial

Used a wee Unix socket to create a lock

We still need to find out why the flip flop occasionally happens. nopreempt should stop this ...


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/23c100d9
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/23c100d9
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/23c100d9

Branch: refs/heads/master
Commit: 23c100d949f8d57e27e199a4401fb8087ec44e3e
Parents: 7bfccd6
Author: Ian Southam <is...@schubergphilis.com>
Authored: Mon Mar 9 16:40:18 2015 +0100
Committer: wilderrodrigues <wr...@schubergphilis.com>
Committed: Mon Mar 16 11:40:12 2015 +0100

----------------------------------------------------------------------
 .../config/opt/cloud/bin/cs/CsRedundant.py      | 29 ++++++++++++++++++--
 1 file changed, 26 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/23c100d9/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
----------------------------------------------------------------------
diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
index a2a1793..3e78f4c 100755
--- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
+++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py
@@ -40,6 +40,8 @@ from CsFile import CsFile
 from CsConfig import CsConfig
 from CsProcess import CsProcess
 from CsApp import CsPasswdSvc
+import socket
+from time import sleep
 
 
 class CsRedundant(object):
@@ -157,11 +159,31 @@ class CsRedundant(object):
         if not proc.find():
             CsHelper.service("keepalived", "restart")
 
+    def set_lock(self):
+        """
+        Make sure that master state changes happen sequentially
+        """
+        iterations = 10
+        time_between = 1
+
+        for iter in range(0, iterations):
+            try:
+                s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                s.bind( '\0master_lock')
+                return s
+            except socket.error, e:
+                error_code = e.args[0]
+                error_string = e.args[1]
+                print "Process already running (%d:%s ). Exiting" % ( error_code, error_string)
+                logging.info("Master is already running, waiting")
+                sleep(1)
+
     def set_fault(self):
         """ Set fault mode on this router """
         if not self.cl.is_redundant():
             logging.error("Set fault called on non-redundant router")
             return
+        s = self.set_lock()
         logging.info("Router switched to fault mode")
         ads = [o for o in self.address.get_ips() if o.is_public()]
         for o in ads:
@@ -188,7 +210,8 @@ class CsRedundant(object):
             logging.error("Set backup called on node that is already backup")
             return
         """
-        logging.info("Router switched to backup mode")
+        s = self.set_lock()
+        logging.debug("Setting router to backup")
         ads = [o for o in self.address.get_ips() if o.is_public()]
         for o in ads:
             CsHelper.execute("ifconfig %s down" % o.get_device())
@@ -202,7 +225,6 @@ class CsRedundant(object):
         CsHelper.service("dnsmasq", "stop")
         # self._set_priority(self.CS_PRIO_DOWN)
         self.cl.set_master_state(False)
-        # CsHelper.service("keepalived", "restart")
         self.cl.save()
         logging.info("Router switched to backup mode")
 
@@ -216,6 +238,8 @@ class CsRedundant(object):
             logging.error("Set master called on master node")
             return
         """
+        s = self.set_lock()
+        logging.debug("Setting router to master")
         ads = [o for o in self.address.get_ips() if o.is_public()]
         for o in ads:
             # cmd2 = "ip link set %s up" % self.getDevice()
@@ -237,7 +261,6 @@ class CsRedundant(object):
         CsHelper.service("dnsmasq", "restart")
         self.cl.set_master_state(True)
         self.cl.save()
-        # CsHelper.service("keepalived", "restart")
         logging.info("Router switched to master mode")
 
     def _collect_ignore_ips(self):