You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2016/07/11 05:19:59 UTC
incubator-hawq git commit: HAWQ-901 Add retries to standby master
start check
Repository: incubator-hawq
Updated Branches:
refs/heads/master e3ea4896b -> c5a3f42fd
HAWQ-901 Add retries to standby master start check
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/c5a3f42f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/c5a3f42f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/c5a3f42f
Branch: refs/heads/master
Commit: c5a3f42fdbc98715294dd2add72c79611814398a
Parents: e3ea489
Author: rlei <rl...@pivotal.io>
Authored: Mon Jul 11 10:22:29 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Mon Jul 11 13:18:46 2016 +0800
----------------------------------------------------------------------
tools/bin/hawq_ctl | 2 +-
tools/sbin/hawqstandbywatch.py | 22 ++++++++++++++++------
2 files changed, 17 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c5a3f42f/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 50070f6..211f599 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -638,7 +638,7 @@ class HawqStart:
cmd = self._start_standby_cmd()
check_return_code(remote_ssh(cmd, self.standby_host_name, self.user))
cmd = "%s; %s/sbin/hawqstandbywatch.py %s debug" % (source_hawq_env, self.GPHOME, self.master_data_directory)
- result = remote_ssh(cmd, self.standby_host_name, self.user)
+ result = remote_ssh_nowait(cmd, self.standby_host_name, self.user)
return result
def _check_standby_sync(self):
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c5a3f42f/tools/sbin/hawqstandbywatch.py
----------------------------------------------------------------------
diff --git a/tools/sbin/hawqstandbywatch.py b/tools/sbin/hawqstandbywatch.py
index 82cf699..ca7ad1d 100755
--- a/tools/sbin/hawqstandbywatch.py
+++ b/tools/sbin/hawqstandbywatch.py
@@ -102,7 +102,7 @@ class SyncmasterWatcher:
self.handles = {}
self.maxlines = 1000
- self.timelimit = 5
+ self.timelimit = 3
self.delay = 0.1
@@ -188,10 +188,20 @@ class SyncmasterWatcher:
break
logger.info("checking if syncmaster is running")
- pid = gp.getSyncmasterPID('localhost', self.datadir)
- if not pid > 0:
- logger.warning("syncmaster not running")
- return 1
+ count = 0
+ counter = 20
+ while True:
+ pid = gp.getSyncmasterPID('localhost', self.datadir)
+ if not pid > 0:
+ if count >= counter:
+ logger.error("Standby master start timeout")
+ return 1
+ else:
+ logger.warning("syncmaster not running, waiting...")
+ else:
+ break
+ count += 1
+ time.sleep(3)
# syncmaster is running and there are no obvious errors in the log
logger.info("syncmaster appears ok, pid %s" % pid)
@@ -219,7 +229,7 @@ if __name__ == '__main__':
# watch syncmaster logs
if len(sys.argv) > 2 and sys.argv[2] == 'debug':
- print "Checking standby master status"
+ logger.info("Checking standby master status")
watcher = SyncmasterWatcher( sys.argv[1] )
rc = watcher.monitor_logs()
watcher.close()