You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2016/07/11 05:19:59 UTC

incubator-hawq git commit: HAWQ-901 Add retries to standby master start check

Repository: incubator-hawq
Updated Branches:
  refs/heads/master e3ea4896b -> c5a3f42fd


HAWQ-901 Add retries to standby master start check


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/c5a3f42f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/c5a3f42f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/c5a3f42f

Branch: refs/heads/master
Commit: c5a3f42fdbc98715294dd2add72c79611814398a
Parents: e3ea489
Author: rlei <rl...@pivotal.io>
Authored: Mon Jul 11 10:22:29 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Mon Jul 11 13:18:46 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawq_ctl             |  2 +-
 tools/sbin/hawqstandbywatch.py | 22 ++++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c5a3f42f/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 50070f6..211f599 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -638,7 +638,7 @@ class HawqStart:
         cmd = self._start_standby_cmd()
         check_return_code(remote_ssh(cmd, self.standby_host_name, self.user))
         cmd = "%s; %s/sbin/hawqstandbywatch.py %s debug" % (source_hawq_env, self.GPHOME, self.master_data_directory)
-        result = remote_ssh(cmd, self.standby_host_name, self.user)
+        result = remote_ssh_nowait(cmd, self.standby_host_name, self.user)
         return result
 
     def _check_standby_sync(self):

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c5a3f42f/tools/sbin/hawqstandbywatch.py
----------------------------------------------------------------------
diff --git a/tools/sbin/hawqstandbywatch.py b/tools/sbin/hawqstandbywatch.py
index 82cf699..ca7ad1d 100755
--- a/tools/sbin/hawqstandbywatch.py
+++ b/tools/sbin/hawqstandbywatch.py
@@ -102,7 +102,7 @@ class SyncmasterWatcher:
 
         self.handles         = {}
         self.maxlines        = 1000
-        self.timelimit       = 5
+        self.timelimit       = 3
         self.delay           = 0.1
 
 
@@ -188,10 +188,20 @@ class SyncmasterWatcher:
                 break
 
         logger.info("checking if syncmaster is running")
-        pid = gp.getSyncmasterPID('localhost', self.datadir)
-        if not pid > 0:
-            logger.warning("syncmaster not running")
-            return 1
+        count = 0
+        counter = 20
+        while True:
+            pid = gp.getSyncmasterPID('localhost', self.datadir)
+            if not pid > 0:
+                if count >= counter:
+                    logger.error("Standby master start timeout")
+                    return 1
+                else:
+                    logger.warning("syncmaster not running, waiting...")
+            else:
+                break
+            count += 1
+            time.sleep(3)
 
         # syncmaster is running and there are no obvious errors in the log
         logger.info("syncmaster appears ok, pid %s" % pid)
@@ -219,7 +229,7 @@ if __name__ == '__main__':
 
     # watch syncmaster logs
     if len(sys.argv) > 2 and sys.argv[2] == 'debug':
-        print "Checking standby master status"
+        logger.info("Checking standby master status")
     watcher = SyncmasterWatcher( sys.argv[1] )
     rc = watcher.monitor_logs()
     watcher.close()