You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2015/11/13 04:56:05 UTC
[1/2] incubator-hawq git commit: HAWQ-40. Refact hawq standby init.
Change standby start sequence and do init directly on standby host.
Repository: incubator-hawq
Updated Branches:
refs/heads/master 8c633bf53 -> 2561ab74d
HAWQ-40. Refact hawq standby init. Change standby start sequence and do init directly on standby host.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/d0c9c1de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/d0c9c1de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/d0c9c1de
Branch: refs/heads/master
Commit: d0c9c1de4ab4eb8d5a6fe44345cda28a70486c81
Parents: 8c633bf
Author: rlei <rl...@pivotal.io>
Authored: Fri Nov 13 11:08:56 2015 +0800
Committer: stanlyxiang <st...@gmail.com>
Committed: Fri Nov 13 11:55:26 2015 +0800
----------------------------------------------------------------------
tools/bin/hawq_ctl | 157 +++++++++-------
tools/bin/hawqpylib/hawqlib.py | 6 +-
tools/bin/lib/hawq_bash_functions.sh | 132 +++++++++++++
tools/bin/lib/hawqinit.sh | 301 +++++++++++++++++-------------
4 files changed, 404 insertions(+), 192 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 880451a..4890cd4 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -66,7 +66,7 @@ class HawqInit:
if item in self.hawq_dict:
logger.info("Check: %s is set" % item)
else:
- sys.exit("Check: %s not configured in hawq-site.xml." % item)
+ sys.exit("Check: %s not configured in hawq-site.xml" % item)
self.master_host_name = self.hawq_dict['hawq_master_address_host']
self.master_port = self.hawq_dict['hawq_master_address_port']
@@ -88,9 +88,13 @@ class HawqInit:
self.standby_host_name = self.hawq_dict['hawq_standby_address_host']
self.standby_port = self.master_port
self.standby_address = self.standby_host_name + ":" + self.standby_port
+ if self.standby_host_name in (self.master_host_name, 'localhost', '127.0.0.1'):
+ logger.error("Standby host should not be the same as master host")
+ sys.exit(1)
else:
logger.info("No standby host configured, skip it")
self.standby_host_name = ''
+
if 'enable_secure_filesystem' in self.hawq_dict:
self.enable_secure_filesystem=self.hawq_dict['enable_secure_filesystem']
self.krb_server_keyfile =self.hawq_dict['krb_server_keyfile']
@@ -131,16 +135,16 @@ class HawqInit:
def check_hdfs_path(self):
cmd = "%s/bin/gpcheckhdfs hdfs %s %s %s" % \
(self.GPHOME, self.dfs_url, self.enable_secure_filesystem, self.krb_server_keyfile)
- logger.info("Check if hdfs path is available.")
+ logger.info("Check if hdfs path is available")
logger.debug("Check hdfs: %s" % cmd)
- check_return_code(local_ssh(cmd, logger, warning = True), logger, "Check hdfs failed, please verify your hdfs settings.")
+ check_return_code(local_ssh(cmd, logger, warning = True), logger, "Check hdfs failed, please verify your hdfs settings")
def set_total_vsegment_num(self):
cmd = "%s; hawq config -c default_segment_num -v %s --skipvalidation -q > /dev/null" % \
(source_hawq_env, self.total_vseg_num)
result = local_ssh(cmd, logger)
if result != 0:
- logger.warn("Set default_segment_num failed.")
+ logger.warn("Set default_segment_num failed")
return result
def set_vsegment_num_per_node(self):
@@ -149,7 +153,7 @@ class HawqInit:
(source_hawq_env, self.vseg_num_per_node)
result = local_ssh(cmd, logger)
if result != 0:
- logger.warn("Set hawq_resourcemanager_query_vsegment_number_per_segment_limit failed.")
+ logger.warn("Set hawq_resourcemanager_query_vsegment_number_per_segment_limit failed")
return result
def _get_master_init_cmd(self):
@@ -163,7 +167,7 @@ class HawqInit:
return cmd
def hawq_remove_standby(self):
- """Removes the standby master."""
+ """Removes the standby master"""
running_standby_host = ''
try:
@@ -173,7 +177,7 @@ class HawqInit:
rows = dbconn.execSQL(conn, query)
conn.close()
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up.")
+ logger.error("Failed to connect to database, this script can only be run when the database is up")
sys.exit(1)
for row in rows:
@@ -183,12 +187,14 @@ class HawqInit:
if running_standby_host:
logger.info("running standby host is %s" % running_standby_host)
signal.signal(signal.SIGINT,signal.SIG_IGN)
- cmd = "%s; hawq stop cluster -a -q" % source_hawq_env
- logger.info("Stop HAWQ cluster.")
- check_return_code(local_ssh(cmd, logger), logger, "Stop HAWQ cluster failed, exit.")
- logger.info("Start HAWQ master.")
+ logger.info("Stop HAWQ cluster")
+ cmd = "%s; hawq stop master -a -q" % source_hawq_env
+ check_return_code(local_ssh(cmd, logger), logger, "Stop HAWQ master failed, exit")
+ cmd = "%s; hawq stop allsegments -a -q" % source_hawq_env
+ check_return_code(local_ssh(cmd, logger), logger, "Stop HAWQ segments failed, exit")
+ logger.info("Start HAWQ master")
cmd = "%s; hawq start master -m -q" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Start HAWQ master failed, exit.")
+ check_return_code(local_ssh(cmd, logger), logger, "Start HAWQ master failed, exit")
try:
logger.info('Remove standby from Database catalog.')
@@ -201,15 +207,15 @@ class HawqInit:
#conn.close()
cmd = 'env PGOPTIONS="-c gp_session_role=utility" %s/bin/psql -p %s -d template1 -c \
"select gp_remove_master_standby();"' % (self.GPHOME, self.master_port)
- local_ssh(cmd, logger)
- logger.info('Database catalog updated successfully.')
- logger.info("Stop HAWQ master.")
+ check_return_code(local_ssh(cmd, logger), logger, \
+ "Update catalog failed, exit", "Catalog updated successfully.")
+ logger.info("Stop HAWQ master")
cmd = "%s; hawq stop master -a" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit.")
+ check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit")
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up.")
+ logger.error("Failed to connect to database, this script can only be run when the database is up")
cmd = "%s; hawq stop master -a" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit.")
+ check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit")
remove_property_xml("hawq_standby_address_host", "%s/etc/hawq-site.xml" % self.GPHOME)
host_list = parse_hosts_file(self.GPHOME)
sync_hawq_site(self.GPHOME, host_list)
@@ -226,24 +232,42 @@ class HawqInit:
logger.debug("rm -rf %s %s" % (self.master_data_directory, tmp_dir_list))
cmd = "rm -rf %s %s" % (self.master_data_directory, tmp_dir_list)
check_return_code(remote_ssh(cmd, self.standby_host_name, self.user), logger, \
- "Delete standby master's directories failed, exit.")
+ "Delete standby master's directories failed, exit")
signal.signal(signal.SIGINT,signal.default_int_handler)
- logger.info('Remove standby master finished.')
+ logger.info('Remove standby master finished')
else:
- logger.info("Do not find a running standby master.")
+ logger.info("Do not find a running standby master")
+
+ def _init_standby(self):
+ logger.info("Start to init standby master: '%s'" % self.standby_host_name)
+ logger.info("This might take a couple of minutes, please wait...")
+ # Sync config files from master.
+ scpcmd = "scp %s/etc/_mgmt_config %s:%s/etc/_mgmt_config > /dev/null" % \
+ (self.GPHOME, self.standby_host_name, self.GPHOME)
+ check_return_code(remote_ssh(scpcmd, self.master_host_name, self.user), \
+ logger, "Sync _mgmt_config failed")
+ scpcmd = "scp %s/etc/slaves %s:%s/etc/slaves > /dev/null" % \
+ (self.GPHOME, self.standby_host_name, self.GPHOME)
+ check_return_code(remote_ssh(scpcmd, self.master_host_name, self.user), \
+ logger, "Sync slaves file failed")
+
+ standby_init_cmd = self._get_standby_init_cmd()
+
+ return check_return_code(remote_ssh_nowait(standby_init_cmd, self.standby_host_name, self.user))
+
def _resync_standby(self):
- logger.info("Re-sync standby.")
+ logger.info("Re-sync standby")
cmd = "%s; hawq stop cluster -a" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Stop hawq cluster failed, exit.")
+ check_return_code(local_ssh(cmd, logger), logger, "Stop hawq cluster failed, exit")
cmd = "cd %s; %s; %s/bin/lib/pysync.py -x gpperfmon/data -x pg_log -x db_dumps %s %s:%s" % \
(self.master_data_directory, source_hawq_env, self.GPHOME, self.master_data_directory,
self.standby_host_name, self.master_data_directory)
result = local_ssh(cmd, logger)
- check_return_code(result, logger, "Re-sync standby master failed, exit.")
+ check_return_code(result, logger, "Re-sync standby master failed, exit")
cmd = "%s; hawq start cluster -a" % source_hawq_env
result = local_ssh(cmd, logger)
- check_return_code(result, logger, "Start hawq cluster failed.")
+ check_return_code(result, logger, "Start hawq cluster failed")
return result
@@ -284,18 +308,10 @@ class HawqInit:
logger.info("Segments init successfully on nodes '%s'" % self.host_list)
if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
- logger.info("Start to init standby master: '%s'" % self.standby_host_name)
- logger.info("This might take a couple of minutes, please wait...")
- standby_init_cmd = self._get_standby_init_cmd()
- scpcmd = "scp %s/etc/_mgmt_config %s:%s/etc/_mgmt_config > /dev/null" % \
- (self.GPHOME, self.standby_host_name, self.GPHOME)
- local_ssh(scpcmd)
- scpcmd = "scp %s/etc/slaves %s:%s/etc/slaves > /dev/null" % \
- (self.GPHOME, self.standby_host_name, self.GPHOME)
- local_ssh(scpcmd)
- check_return_code(remote_ssh(standby_init_cmd, self.master_host_name, self.user), logger, \
- "Standby master init failed, exit", "Standby master init successfully")
- logger.info("HAWQ cluster init successfully")
+ check_return_code(self._init_standby(), logger, \
+ "Init standby failed, exit", \
+ "Init standby successfully")
+ logger.info("Init HAWQ cluster successfully")
return None
def run(self):
@@ -313,16 +329,18 @@ class HawqInit:
logger.info("Try to remove standby master")
self.hawq_remove_standby()
elif self.node_type == "standby":
- logger.info("Start to init standby master")
- logger.info("This might take couple minutes, please wait...")
+ if self.standby_host_name in ('', 'None', 'none', 'NONE'):
+ logger.info("No standby host found")
+ logger.info("Please check your standby host name")
+ sys.exit(1)
if self.no_update:
check_return_code(self._resync_standby(), logger, \
"Standby master re-sync failed, exit", \
"Standby master re-sync successfully")
else:
- cmd = self._get_standby_init_cmd()
- check_return_code(local_ssh(cmd, logger), logger, "Standby master init failed, exit", \
- "Standby master init successfully")
+ check_return_code(self._init_standby(), logger, \
+ "Init standby failed, exit", \
+ "Init standby successfully")
elif self.node_type == "segment":
cmd = self._get_segment_init_cmd()
@@ -333,7 +351,7 @@ class HawqInit:
self.check_hdfs_path()
self._init_cluster()
else:
- sys.exit('node_type should be in master/standby/segment/cluster')
+ sys.exit('hawq init object should be one of master/standby/segment/cluster')
return None
class HawqStart:
@@ -364,7 +382,7 @@ class HawqStart:
for item in check_items:
if item not in self.hawq_dict:
- logger.error("Check: %s not configured in hawq-site.xml." % item)
+ logger.error("Check: %s not configured in hawq-site.xml" % item)
sys.exit()
self.master_host_name = self.hawq_dict['hawq_master_address_host']
@@ -433,14 +451,16 @@ class HawqStart:
def _start_all_nodes(self):
logger.info("Start all the nodes in hawq cluster")
- logger.info("Starting master node '%s'" % self.master_host_name)
- check_return_code(self.start_master(), logger, "Master start failed, exit", \
- "Master started successfully")
if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
logger.info("Starting standby master '%s'" % self.standby_host_name)
check_return_code(self.start_standby(), logger, "Standby master start failed, exit",
"Standby master started successfully")
+
+ logger.info("Starting master node '%s'" % self.master_host_name)
+ check_return_code(self.start_master(), logger, "Master start failed, exit", \
+ "Master started successfully")
+
segment_cmd_str = self._start_segment_cmd()
logger.info("Start segments in list: %s" % self.host_list)
work_list = []
@@ -455,6 +475,7 @@ class HawqStart:
logger.error("Segments start failed")
else:
logger.info("Segments started successfully")
+ logger.info("HAWQ cluster started successfully")
return node_init.return_flag
def _start_all_segments(self):
@@ -531,7 +552,7 @@ class HawqStop:
for item in check_items:
if item not in self.hawq_dict:
- sys.exit("Check: %s not configured in hawq-site.xml." % item)
+ sys.exit("Check: %s not configured in hawq-site.xml" % item)
self.master_host_name = self.hawq_dict['hawq_master_address_host']
self.master_port = self.hawq_dict['hawq_master_address_port']
@@ -743,13 +764,21 @@ def remote_ssh(cmd_str, host, user, q=None):
result = subprocess.Popen(remote_cmd_str, shell=True, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
stdout,stderr = result.communicate()
if stdout and stdout != '':
- logger.info(stdout)
- if stderr and stdout != '':
- logger.info(stderr)
+ logger.info(stdout.strip())
+ if stderr and stderr != '':
+ logger.info(stderr.strip())
if q:
q.put(("done", host, result.returncode))
return result.returncode
+def remote_ssh_nowait(cmd, host, user):
+ if user == "":
+ remote_cmd_str = "ssh -o 'StrictHostKeyChecking no' %s \"%s\"" % (host, cmd)
+ else:
+ remote_cmd_str = "ssh -o 'StrictHostKeyChecking no' %s@%s \"%s\"" % (user, host, cmd)
+ result = subprocess.Popen(remote_cmd_str, shell=True).wait()
+ return result
+
def check_progress(q, total_num, action, quiet=False):
working_num = total_num
@@ -799,31 +828,31 @@ def hawq_activate_standby(opts, hawq_dict):
cmd = "%s; hawq stop cluster -a -M fast;" % source_hawq_env
result = local_ssh(cmd, logger)
if result != 0:
- logger.debug("Stop cluster failed, try to stop it immediately.")
+ logger.debug("Stop cluster failed, try to stop it immediately")
cmd = "%s; hawq stop cluster -a -M immediate;" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Stop cluster failed, exit.")
+ check_return_code(local_ssh(cmd, logger), logger, "Stop cluster failed, exit")
cmd = "%s; hawq config -c hawq_master_address_host -v %s --skipvalidation -q" % \
(source_hawq_env, hawq_dict['hawq_standby_address_host'])
- check_return_code(local_ssh(cmd, logger), logger, "Set hawq_master_address_host failed.")
+ check_return_code(local_ssh(cmd, logger), logger, "Set hawq_master_address_host failed")
cmd = "%s; hawq config -c hawq_standby_address_host -v %s --skipvalidation -q" % \
(source_hawq_env, 'none')
- check_return_code(local_ssh(cmd, logger), logger, "Set hawq_standby_address_host failed.")
+ check_return_code(local_ssh(cmd, logger), logger, "Set hawq_standby_address_host failed")
cmd = '''echo "gp_persistent_repair_global_sequence = true" >> %s/%s''' % (hawq_dict['hawq_master_directory'], 'postgresql.conf')
- check_return_code(local_ssh(cmd, logger), logger, "Set gp_persistent_repair_global_sequence = true failed.")
+ check_return_code(local_ssh(cmd, logger), logger, "Set gp_persistent_repair_global_sequence = true failed")
cmd = "%s; hawq start master" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Start master failed.")
+ check_return_code(local_ssh(cmd, logger), logger, "Start master failed")
cmd = "%s; env PGOPTIONS=\"-c gp_session_role=utility\" psql -p %s -d template1 -c \"select gp_remove_master_standby()\
where (select count(*) from gp_segment_configuration where role='s') = 1;\"" % (source_hawq_env, hawq_dict['hawq_master_address_port'])
result = local_ssh(cmd, logger)
cmd = "%s; hawq stop master -a" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Stop master failed.")
+ check_return_code(local_ssh(cmd, logger), logger, "Stop master failed")
cmd = "%s; hawq start cluster" % source_hawq_env
- check_return_code(local_ssh(cmd, logger), logger, "Start cluster failed.")
+ check_return_code(local_ssh(cmd, logger), logger, "Start cluster failed")
cmd = '''sed -i "/gp_persistent_repair_global_sequence/d" %s/%s''' % (hawq_dict['hawq_master_directory'], 'postgresql.conf')
check_return_code(local_ssh(cmd, logger))
return None
@@ -888,16 +917,16 @@ def create_parser():
dest="masteronly",
action="store_true",
default=False,
- help="Start hawq in utility mode.")
+ help="Start hawq in utility mode")
parser.add_option("-U", "--special-mode",
choices=['upgrade', 'maintenance'],
dest="special_mode",
- help="Start hawq in upgrade/maintenance mode.")
+ help="Start hawq in upgrade/maintenance mode")
parser.add_option("-R", "--restrict",
dest="restrict",
action="store_true",
default=False,
- help="Start hawq in restrict mode.")
+ help="Start hawq in restrict mode")
parser.add_option('-r', '--remove-standby', action='store_true',
dest='remove_standby', default=False,
help='Delete hawq standby master node.')
@@ -908,12 +937,12 @@ def create_parser():
type="int",
dest="virtual_seg_num",
default=8,
- help="Sets maximum number of virtual segments per node.")
+ help="Sets maximum number of virtual segments per node")
parser.add_option("--vsegment-number",
type="int",
dest="virtual_seg_num",
default=8,
- help="Sets maximum number of virtual segments per node.")
+ help="Sets maximum number of virtual segments per node")
parser.add_option("--locale",
dest="hawq_locale",
default="en_US.utf8",
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/hawqpylib/hawqlib.py
----------------------------------------------------------------------
diff --git a/tools/bin/hawqpylib/hawqlib.py b/tools/bin/hawqpylib/hawqlib.py
index b5da22c..bf9168c 100755
--- a/tools/bin/hawqpylib/hawqlib.py
+++ b/tools/bin/hawqpylib/hawqlib.py
@@ -87,12 +87,12 @@ def local_ssh(cmd, logger = None, warning = False):
stdout,stderr = result.communicate()
if logger:
if stdout != '':
- logger.info(stdout)
+ logger.info(stdout.strip())
if stderr != '':
if not warning:
- logger.error(stderr)
+ logger.error(stderr.strip())
else:
- logger.warn(stderr)
+ logger.warn(stderr.strip())
return result.returncode
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/lib/hawq_bash_functions.sh
----------------------------------------------------------------------
diff --git a/tools/bin/lib/hawq_bash_functions.sh b/tools/bin/lib/hawq_bash_functions.sh
new file mode 100755
index 0000000..f547c30
--- /dev/null
+++ b/tools/bin/lib/hawq_bash_functions.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+#Check that SHELL is /bin/bash
+if [ $SHELL != /bin/bash ] && [ `ls -al /bin/sh|grep -c bash` -ne 1 ];then
+ echo "[FATAL]:-Scripts must be run by a user account that has SHELL=/bin/bash"
+ if [ -f /bin/bash ];then
+ echo "[INFO]:-/bin/bash exists, please update user account shell"
+ else
+ echo "[WARN]:-/bin/bash does not exist, does bash need to be installed?"
+ fi
+ exit 2
+fi
+
+declare -a CMDPATH
+CMDPATH=(/usr/kerberos/bin /usr/sfw/bin /opt/sfw/bin /usr/local/bin /bin /usr/bin /sbin /usr/sbin /usr/ucb /sw/bin)
+
+findCmdInPath() {
+ cmdtofind=$1
+
+ if [ $cmdtofind = 'awk' ] && [ `uname` = SunOS ]; then
+ if [ -f "/usr/xpg4/bin/awk" ]; then
+ CMD=/usr/xpg4/bin/awk
+ echo $CMD
+ return
+ else
+ echo $cmdtofind
+ return "Problem in hawq_bash_functions, command '/usr/xpg4/bin/awk' not found. \
+ You will need to edit the script named hawq_bash_functions.sh to \
+ properly locate the needed commands for your platform."
+ fi
+ fi
+ for pathel in ${CMDPATH[@]}
+ do
+ CMD=$pathel/$cmdtofind
+ if [ x"$CMD" != x"" ] && [ -f $CMD ]; then
+ echo $CMD
+ return
+ fi
+ done
+ echo $cmdtofind
+ return "Problem in hawq_bash_functions, command '$cmdtofind' not found in COMMAND path. \
+ You will need to edit the script named hawq_bash_functions.sh to properly locate \
+ the needed commands for your platform."
+}
+
+AWK=`findCmdInPath awk`
+BASENAME=`findCmdInPath basename`
+CAT=`findCmdInPath cat`
+CLEAR=`findCmdInPath clear`
+CKSUM=`findCmdInPath cksum`
+CUT=`findCmdInPath cut`
+DATE=`findCmdInPath date`
+DD=`findCmdInPath dd`
+DIRNAME=`findCmdInPath dirname`
+DF=`findCmdInPath df`
+DU=`findCmdInPath du`
+ECHO=`findCmdInPath echo`
+EXPR=`findCmdInPath expr`
+FIND=`findCmdInPath find`
+TABECHO=$ECHO
+PROMPT="$ECHO"
+GREP=`findCmdInPath grep`
+GZIPCMD=`findCmdInPath gzip`
+EGREP=`findCmdInPath egrep`
+HEAD=`findCmdInPath head`
+HOSTNAME=`findCmdInPath hostname`
+IPCS=`findCmdInPath ipcs`
+IFCONFIG=`findCmdInPath ifconfig`
+KILL=`findCmdInPath kill`
+LESSCMD=`findCmdInPath less`
+LS=`findCmdInPath ls`
+LOCALE=`findCmdInPath locale`
+MV=`findCmdInPath mv`
+MORECMD=`findCmdInPath more`
+MKDIR=`findCmdInPath mkdir`
+MKFIFO=`findCmdInPath mkfifo`
+NETSTAT=`findCmdInPath netstat`
+PING=`findCmdInPath ping`
+PS=`findCmdInPath ps`
+PYTHON=${GPHOME}/ext/python/bin/python
+RM=`findCmdInPath rm`
+SCP=`findCmdInPath scp`
+SED=`findCmdInPath sed`
+SLEEP=`findCmdInPath sleep`
+SORT=`findCmdInPath sort`
+SPLIT=`findCmdInPath split`
+SSH=`findCmdInPath ssh`
+TAIL=`findCmdInPath tail`
+TAR=`findCmdInPath tar`
+TEE=`findCmdInPath tee`
+TOUCH=`findCmdInPath touch`
+TR=`findCmdInPath tr`
+WC=`findCmdInPath wc`
+WHICH=`findCmdInPath which`
+WHOAMI=`findCmdInPath whoami`
+ZCAT=`findCmdInPath zcat`
+
+CALL_HOST=`$HOSTNAME|$CUT -d. -f1`
+VERBOSE=0
+USER_NAME=`id|$AWK '{print $1}'|$CUT -d"(" -f2|$TR -d ')'`
+PROG_NAME=`echo $0 | $TR -d '-'`
+PROG_NAME=`$BASENAME $PROG_NAME`
+PROG_PIDNAME=`echo $$ $PROG_NAME | awk '{printf "%06d %s\n", $1, $2}'`
+LOG_FILE=/tmp/mylog
+#DEBUG_LEVEL=1
+
+LOG_MSG () {
+ EXIT_STATUS=0
+ TIME=`$DATE +%H":"%M":"%S`
+ CUR_DATE=`$DATE +%Y%m%d`
+ DISPLAY_TXT=0
+ #Check to see if we need to update value of EXIT_STATUS
+ if [ `$ECHO $1|$AWK -F"]" '{print $1}'|$TR -d '\133'|$GREP -c "WARN"` -eq 1 ];then
+ EXIT_STATUS=1
+ fi
+ if [ `$ECHO $1|$AWK -F"]" '{print $1}'|$TR -d '\133'|$GREP -c "FATAL"` -eq 1 ];then
+ EXIT_STATUS=2
+ fi
+ if [ `$ECHO $1|$AWK -F"]" '{print $1}'|$TR -d '\133'|$GREP -c "ERROR"` -eq 1 ];then
+ EXIT_STATUS=2
+ fi
+ if [ "$2" == "verbose" ] || [ "$2" == "VERBOSE" ] || [ "$2" == "v" ] || [ "$2" == "V" ]; then
+ VERBOSE=1
+ fi
+
+ if [ "$VERBOSE" == "1" ]; then
+ $ECHO "${CUR_DATE}:${TIME}:${PROG_PIDNAME}:${CALL_HOST}:${USER_NAME}-$1" | $TEE -a $LOG_FILE
+ else
+ $ECHO "${CUR_DATE}:${TIME}:${PROG_PIDNAME}:${CALL_HOST}:${USER_NAME}-$1" >> $LOG_FILE
+ fi
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/lib/hawqinit.sh
----------------------------------------------------------------------
diff --git a/tools/bin/lib/hawqinit.sh b/tools/bin/lib/hawqinit.sh
index 6a7e1d8..312f3c1 100755
--- a/tools/bin/lib/hawqinit.sh
+++ b/tools/bin/lib/hawqinit.sh
@@ -2,22 +2,25 @@
object_type=$1
GPHOME=$2
-
-current_time=`date '+%s'`
-mgmt_config_file=${GPHOME}/etc/_mgmt_config
+VERBOSE=0
+if [ "$3" == "1" ]; then
+ VERBOSE=1
+fi
+source ${GPHOME}/bin/lib/hawq_bash_functions.sh
+SOURCE_PATH="source ${GPHOME}/greenplum_path.sh"
+${SOURCE_PATH}
if [ -f /etc/redhat-release ]; then
- os_version=`cat /etc/redhat-release | awk '{print substr($7,0,1)}'`
+ os_version=`${CAT} /etc/redhat-release | ${AWK} '{print substr($7,0,1)}'`
else
os_version='other'
fi
-source ${GPHOME}/greenplum_path.sh
-
+mgmt_config_file=${GPHOME}/etc/_mgmt_config
if [ -f ${mgmt_config_file} ]; then
source ${mgmt_config_file} > /dev/null 2>&1
else
- echo "${mgmt_config_file} is not exist, exit"
+ ${ECHO} "${mgmt_config_file} is not exist, exit"
exit 1
fi
@@ -30,7 +33,7 @@ elif [ ${object_type} = "segment" ]; then
hawq_port=${segment_port}
tmp_dir_list=${hawq_segment_temp_directory//,/ }
else
- echo "Node object should be master/standby/segment"
+ ${ECHO} "hawq init object should be one of master/standby/segment"
exit 1
fi
master_max_connections=${max_connections}
@@ -38,19 +41,19 @@ segment_max_connections=${max_connections}
master_ip_address_all=""
standby_ip_address_all=""
if [ "${os_version}" = "7" ];then
- master_ip_address_all=`ssh ${master_host_name} "/sbin/ifconfig |grep -v '127.0.0' | grep 'inet '|awk '{print \\$2}'"`
+ master_ip_address_all=`${SSH} ${master_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' | ${GREP} 'inet '|${AWK} '{print \\$2}'"`
if [ "${standby_host_name}" != "" ] && [ "${standby_host_name}" != "None" ] \
&& [ "${standby_host_name}" != "none" ] && [ "${standby_host_name}" != "NONE" ];then
- standby_ip_address_all=`ssh ${standby_host_name} "/sbin/ifconfig |grep -v '127.0.0' | grep 'inet '|awk '{print \\$2}'"`
+ standby_ip_address_all=`${SSH} ${standby_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' | ${GREP} 'inet '|${AWK} '{print \\$2}'"`
fi
- segment_ip_address_all=`/sbin/ifconfig | grep -v '127.0.0' | awk '/inet addr/{print substr($2,6)}'`
+ segment_ip_address_all=`${IFCONFIG} | ${GREP} -v '127.0.0' | ${AWK} '/inet addr/{print substr($2,6)}'`
else
- master_ip_address_all=`ssh ${master_host_name} "/sbin/ifconfig |grep -v '127.0.0' |awk '/inet addr/{print substr(\\$2,6)}'"`
+ master_ip_address_all=`${SSH} ${master_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' |${AWK} '/inet addr/{print substr(\\$2,6)}'"`
if [ "${standby_host_name}" != "" ] && [ "${standby_host_name}" != "None" ] \
&& [ "${standby_host_name}" != "none" ] && [ "${standby_host_name}" != "NONE" ];then
- standby_ip_address_all=`ssh ${standby_host_name} "/sbin/ifconfig |grep -v '127.0.0' |awk '/inet addr/{print substr(\\$2,6)}'"`
+ standby_ip_address_all=`${SSH} ${standby_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' |${AWK} '/inet addr/{print substr(\\$2,6)}'"`
fi
- segment_ip_address_all=`/sbin/ifconfig | grep -v '127.0.0' | awk '/inet addr/{print substr($2,6)}'`
+ segment_ip_address_all=`${IFCONFIG} | ${GREP} -v '127.0.0' | ${AWK} '/inet addr/{print substr($2,6)}'`
fi
PG_HBA=pg_hba.conf
@@ -59,13 +62,15 @@ TMP_PG_HBA=/tmp/pg_hba_conf_master.$$
MASTER_LOG_FILE=${log_filename}
STANDBY_LOG_FILE=${log_filename}
SEGMENT_LOG_FILE=${log_filename}
+LOG_FILE=${log_filename}
PSQL=${GPHOME}/bin/psql
+PG_CTL=${GPHOME}/bin/pg_ctl
if [ "${log_dir}" = "None" ]; then
-log_dir=${HOME}/hawqAdminLogs
+ log_dir=${HOME}/hawqAdminLogs
fi
if [ ! -d ${log_dir} ]; then
- mkdir -p ${log_dir}
+ ${MKDIR} -p ${log_dir}
fi
if [ ! -f ${log_filename} ]; then
@@ -73,103 +78,104 @@ if [ ! -f ${log_filename} ]; then
fi
GET_CIDRADDR () {
- if [ `echo $1 | grep -c :` -gt 0 ]; then
- echo $1/128
+ if [ `${ECHO} $1 | ${GREP} -c :` -gt 0 ]; then
+ ${ECHO} $1/128
else
- echo $1/32
+ ${ECHO} $1/32
fi
}
LOAD_GP_TOOLKIT () {
- CUR_DATE=`date +%Y%m%d`
- FILE_TIME=`date +%H%M%S`
- echo "[INFO]:-Loading hawq_toolkit..." >> ${MASTER_LOG_FILE}
+ CUR_DATE=`${DATE} +%Y%m%d`
+ FILE_TIME=`${DATE} +%H%M%S`
+ TOOLKIT_FILE=/tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME
+ LOG_MSG "[INFO]:-Loading hawq_toolkit..." verbose
ROLNAME=`$PSQL -q -t -A -p ${hawq_port} -c "select rolname from pg_authid where oid=10" template1`
if [ x"$ROLNAME" == x"" ];then
- echo "[FATAL]:-Failed to retrieve rolname." | tee -a ${MASTER_LOG_FILE}
+ LOG_MSG "[FATAL]:-Failed to retrieve rolname." verbose
exit 1
fi
- if [ -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME ]; then
- rm -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME
+ if [ -f ${TOOLKIT_FILE} ]; then
+ ${RM} -f ${TOOLKIT_FILE}
fi
# We need SET SESSION AUTH here to load the toolkit
- echo "SET SESSION AUTHORIZATION $ROLNAME;" >> /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME 2>&1
+ ${ECHO} "SET SESSION AUTHORIZATION $ROLNAME;" >> ${TOOLKIT_FILE} 2>&1
RETVAL=$?
if [ $RETVAL -ne 0 ];then
- echo "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- cat $GPHOME/share/postgresql/gp_toolkit.sql >> /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME 2>&1
+ ${CAT} $GPHOME/share/postgresql/gp_toolkit.sql >> ${TOOLKIT_FILE} 2>&1
RETVAL=$?
if [ $RETVAL -ne 0 ];then
- echo "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- $PSQL -q -p ${hawq_port} -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME template1
+ $PSQL -q -p ${hawq_port} -f ${TOOLKIT_FILE} template1
RETVAL=$?
if [ $RETVAL -ne 0 ];then
- echo "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- $PSQL -q -p ${hawq_port} -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME postgres
+ $PSQL -q -p ${hawq_port} -f ${TOOLKIT_FILE} postgres
RETVAL=$?
if [ $RETVAL -ne 0 ];then
- echo "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- rm -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME
+ ${RM} -f ${TOOLKIT_FILE}
return $RETVAL
}
update_master_pg_hba(){
# Updatepg_hba.conf for master.
- cat ${hawq_data_directory}/${PG_HBA} |grep '^#' > ${TMP_PG_HBA}
+ ${CAT} ${hawq_data_directory}/${PG_HBA} |${GREP} '^#' > ${TMP_PG_HBA}
mv ${TMP_PG_HBA} ${hawq_data_directory}/${PG_HBA}
# Setting local access"
- echo "local all $USER ident" >> ${hawq_data_directory}/${PG_HBA}
- # echo "[INFO]:-Setting local host access"
- echo "host all $USER 127.0.0.1/28 trust" >> ${hawq_data_directory}/${PG_HBA}
- MASTER_IPV6_LOCAL_ADDRESS_ALL=(`/sbin/ip -6 address show |grep inet6|awk '{print $2}' |cut -d'/' -f1`)
- MASTER_HBA_IP_ADDRESS=(`echo ${master_ip_address_all[@]} ${MASTER_IPV6_LOCAL_ADDRESS_ALL[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
+ ${ECHO} "local all $USER ident" >> ${hawq_data_directory}/${PG_HBA}
+ # ${ECHO} "[INFO]:-Setting local host access"
+ ${ECHO} "host all $USER 127.0.0.1/28 trust" >> ${hawq_data_directory}/${PG_HBA}
+ MASTER_IPV6_LOCAL_ADDRESS_ALL=(`/sbin/ip -6 address show |${GREP} inet6|${AWK} '{print $2}' |cut -d'/' -f1`)
+ MASTER_HBA_IP_ADDRESS=(`${ECHO} ${master_ip_address_all[@]} ${MASTER_IPV6_LOCAL_ADDRESS_ALL[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
for ip_address in ${MASTER_HBA_IP_ADDRESS[@]}; do
CIDR_MASTER_IP=$(GET_CIDRADDR ${ip_address})
- CHK_COUNT=`grep -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
+ CHK_COUNT=`${GREP} -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
if [ "$CHK_COUNT" -eq "0" ];then
- echo "host all ${USER} ${CIDR_MASTER_IP} trust" >> ${hawq_data_directory}/${PG_HBA}
+ ${ECHO} "host all ${USER} ${CIDR_MASTER_IP} trust" >> ${hawq_data_directory}/${PG_HBA}
else
- echo "${CIDR_MASTER_IP} already exist in ${hawq_data_directory}/${PG_HBA}"
+ ${ECHO} "${CIDR_MASTER_IP} already exist in ${hawq_data_directory}/${PG_HBA}"
fi
done
}
update_standby_pg_hba(){
# Updatepg_hba.conf for standby master.
- echo "host all all 0.0.0.0/0 trust" >> ${hawq_data_directory}/${PG_HBA}
+ ${ECHO} "host all all 0.0.0.0/0 trust" >> ${hawq_data_directory}/${PG_HBA}
}
update_segment_pg_hba(){
# Updatepg_hba.conf for segment.
# Setting local access"
- MASTERS_HBA_IP_ADDRESSES=(`echo ${master_ip_address_all[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
+ MASTERS_HBA_IP_ADDRESSES=(`${ECHO} ${master_ip_address_all[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
for ip_address in ${MASTERS_HBA_IP_ADDRESSES[@]}; do
CIDR_MASTER_IP=$(GET_CIDRADDR ${ip_address})
- CHK_COUNT=`grep -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
+ CHK_COUNT=`${GREP} -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
if [ "$CHK_COUNT" -eq "0" ];then
- echo "host all all ${CIDR_MASTER_IP} trust" >> ${hawq_data_directory}/${PG_HBA}
+ ${ECHO} "host all all ${CIDR_MASTER_IP} trust" >> ${hawq_data_directory}/${PG_HBA}
fi
done
for ip_address in ${segment_ip_address_all[@]}; do
CIDR_MASTER_IP=$(GET_CIDRADDR ${ip_address})
- CHK_COUNT=`grep -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
+ CHK_COUNT=`${GREP} -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
if [ "$CHK_COUNT" -eq "0" ];then
- echo "host all ${USER} ${CIDR_MASTER_IP} trust" >> ${hawq_data_directory}/${PG_HBA}
+ ${ECHO} "host all ${USER} ${CIDR_MASTER_IP} trust" >> ${hawq_data_directory}/${PG_HBA}
fi
done
}
@@ -181,145 +187,189 @@ master_init() {
--shared_buffers=${shared_buffers} --backend_output=${log_dir}/master.initdb 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Master postgres initdb failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Master postgres initdb failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
update_master_pg_hba 1>>${MASTER_LOG_FILE} 2>&1
- ${GPHOME}/bin/pg_ctl -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o " -p ${hawq_port} --silent-mode=true -M master -i" start >> ${MASTER_LOG_FILE}
+ ${PG_CTL} -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o " -p ${hawq_port} --silent-mode=true -M master -i" start >> ${MASTER_LOG_FILE}
if [ $? -ne 0 ] ; then
- echo "Start hawq master failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Start hawq master failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create filespace dfs_system on hdfs ('${dfs_url}');" 1>>${MASTER_LOG_FILE} 2>&1
+ $PSQL -p ${hawq_port} -d template1 -c "create filespace dfs_system on hdfs ('${dfs_url}');" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Create filespace failed, please check your hdfs settings" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Create filespace failed, please check your hdfs settings" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create tablespace dfs_default filespace dfs_system;" 1>>${MASTER_LOG_FILE} 2>&1
+ $PSQL -p ${hawq_port} -d template1 -c "create tablespace dfs_default filespace dfs_system;" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Create tablespace failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Create tablespace failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c \
+ env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c \
"SET allow_system_table_mods='dml';UPDATE pg_database SET dat2tablespace = (SELECT oid FROM pg_tablespace WHERE spcname = 'dfs_default') WHERE datname = 'template1';" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Configure database template1 failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Configure database template1 failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create database template0 tablespace dfs_default template template1;" 1>>${MASTER_LOG_FILE} 2>&1
+ $PSQL -p ${hawq_port} -d template1 -c "create database template0 tablespace dfs_default template template1;" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Create database template0 failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Create database template0 failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't', datallowconn = false WHERE datname = 'template0';" 1>>${MASTER_LOG_FILE} 2>&1
+ env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't', datallowconn = false WHERE datname = 'template0';" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Configure database template0 failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Configure database template0 failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create database postgres tablespace dfs_default;" 1>>${MASTER_LOG_FILE} 2>&1
+ $PSQL -p ${hawq_port} -d template1 -c "create database postgres tablespace dfs_default;" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Create database postgres failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Create database postgres failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't' WHERE datname = 'postgres';" 1>>${MASTER_LOG_FILE} 2>&1
+ env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't' WHERE datname = 'postgres';" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Configure database postgres failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Configure database postgres failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "CHECKPOINT;" 1>>${MASTER_LOG_FILE} 2>&1
+ env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c "CHECKPOINT;" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "CHECKPOINT failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "CHECKPOINT failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
LOAD_GP_TOOLKIT
if [ $? -ne 0 ] ; then
- echo "Load TOOLKIT failed" | tee -a ${MASTER_LOG_FILE}
+ ${ECHO} "Load TOOLKIT failed" | tee -a ${MASTER_LOG_FILE}
exit 1
fi
- ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "alter user \"${USER}\" password 'gparray';" 1>>${MASTER_LOG_FILE} 2>&1
+ $PSQL -p ${hawq_port} -d template1 -c "alter user \"${USER}\" password 'gparray';" 1>>${MASTER_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Alter user failed" 1>> ${MASTER_LOG_FILE} 2>&1
+ ${ECHO} "Alter user failed" 1>> ${MASTER_LOG_FILE} 2>&1
exit 1
fi
}
standby_init() {
# Make sure log file are created.
- ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
- "if [ ! -d ${log_dir} ]; then echo \"Try to create log directory for standby master.\"; mkdir -p ${log_dir}; fi"
- ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
- "if [ ! -f ${STANDBY_LOG_FILE} ]; then touch ${STANDBY_LOG_FILE}; fi"
- STANDBY_IP_ADDRESSES=`ping -c1 -n ${standby_host_name} | head -n1 | sed 's/.*(\([0-9]*\.[0-9]*\.[0-9]*\.[0-9]*\)).*/\1/g'`
- echo "Try to stop HAWQ cluster" 1>>${STANDBY_LOG_FILE}
- source $GPHOME/greenplum_path.sh
- # Stop hawq cluster before add new standby master.
- hawq stop cluster -a >> ${STANDBY_LOG_FILE}
- # Check if data directory are exist and keep clean.
- ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
- "if [ ! -d ${master_data_directory} ]; then echo \"Data directory ${master_data_directory} is not exist, please create it.\"; exit 1; fi"
- if [ $? -ne 0 ] ; then
- echo "Standby master data directory check failed" | tee -a ${STANDBY_LOG_FILE}
+ if [ ! -f ${STANDBY_LOG_FILE} ]; then
+ touch ${STANDBY_LOG_FILE};
+ fi
+
+ LOG_MSG ""
+ LOG_MSG "[INFO]:-Stopping HAWQ cluster"
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; hawq stop allsegments -a -M fast;" >> ${STANDBY_LOG_FILE} 2>&1
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[ERROR]:-Stop segments failed" verbose
exit 1
+ else
+ LOG_MSG "[INFO]:-HAWQ segments stopped" verbose
fi
- ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
- "if [ \"\$(ls -A ${hawq_data_directory})\" ] && [ \"${hawq_data_directory}\" != \"\" ]; then echo \"Data directory ${hawq_data_directory} is not empty, please clean it.\"; exit 1; fi"
- if [ $? -ne 0 ] ; then
- echo "Standby master data directory check failed" | tee -a ${STANDBY_LOG_FILE}
+
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; hawq stop master -a -M fast;" >> ${STANDBY_LOG_FILE} 2>&1
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[ERROR]:-Stop master failed" verbose
exit 1
+ else
+ LOG_MSG "[INFO]:-HAWQ master stopped" verbose
fi
- pushd ${master_data_directory} >> ${STANDBY_LOG_FILE}
+
# Sync data directories to standby master.
- echo "Sync master files to standby from master" >> ${STANDBY_LOG_FILE}
- tar cf - * --exclude="pg_log" --exclude="db_dumps" --exclude="gpperfmon/data" \
- | ssh ${standby_host_name} tar xf - -C ${master_data_directory} 1>>${STANDBY_LOG_FILE}
- if [ $? -ne 0 ] ; then
- echo "Sync master files to standby failed" | tee -a ${STANDBY_LOG_FILE}
+ LOG_MSG "[INFO]:-Sync files to standby from master"
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "cd ${master_data_directory}; \
+ ${SOURCE_PATH}; ${GPHOME}/bin/lib/pysync.py -x gpperfmon/data -x pg_log -x db_dumps \
+ ${master_data_directory} ${standby_host_name}:${master_data_directory};" >> ${STANDBY_LOG_FILE} 2>&1
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[FATAL]:-Sync master files to standby failed" verbose
exit 1
fi
+
+ ${MKDIR} -p ${master_data_directory}/pg_log | tee -a ${STANDBY_LOG_FILE}
+
+ STANDBY_IP_ADDRESSES=`${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${PING} -c1 -n ${standby_host_name} | head -n1 | sed 's/.*(\([0-9]*\.[0-9]*\.[0-9]*\.[0-9]*\)).*/\1/g';"`
+ if [ -z "${STANDBY_IP_ADDRESSES}" ] ; then
+ LOG_MSG "[FATAL]:-Standby ip address is empty" verbose
+ exit 1
+ else
+ LOG_MSG "[INFO]:-Standby ip address is ${STANDBY_IP_ADDRESSES}" verbose
+ fi
+
- ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} "mkdir -p ${master_data_directory}/pg_log | tee -a ${STANDBY_LOG_FILE};"
- hawq start standby -a >> ${STANDBY_LOG_FILE}
- hawq stop standby -a >> ${STANDBY_LOG_FILE}
-
- hawq start master -a >> ${STANDBY_LOG_FILE}
- env PGOPTIONS="-c gp_session_role=utility" psql -p ${master_port} -d template1 -c"select gp_remove_master_standby() where (select count(*) from gp_segment_configuration where role='s') = 1;" >> ${STANDBY_LOG_FILE} 2>&1
- env PGOPTIONS="-c gp_session_role=utility" psql -p ${master_port} -d template1 -c \
- "select gp_add_master_standby('${standby_host_name}','${STANDBY_IP_ADDRESSES}','');" 1>>${STANDBY_LOG_FILE} 2>&1
- if [ $? -ne 0 ] ; then
- echo "Register standby infomation failed" | tee -a ${STANDBY_LOG_FILE}
+ LOG_MSG "[INFO]:-Start hawq master" verbose
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; hawq start master -a --masteronly >> ${STANDBY_LOG_FILE}"
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[ERROR]:-Start HAWQ master failed" verbose
+ exit 1
+ else
+ LOG_MSG "[INFO]:-HAWQ master started" verbose
+ fi
+
+ LOG_MSG "[INFO]:-Try to remove existing standby from catalog" verbose
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; env PGOPTIONS=\"-c gp_session_role=utility\" $PSQL -p ${master_port} -d template1 \
+ -c\"select gp_remove_master_standby() where (select count(*) from gp_segment_configuration where role='s') = 1;\";" >> ${STANDBY_LOG_FILE} 2>&1
+
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; env PGOPTIONS=\"-c gp_session_role=utility\" $PSQL -p ${master_port} -d template1 -c \
+ \"select gp_add_master_standby('${standby_host_name}','${STANDBY_IP_ADDRESSES}','');\";" >>${STANDBY_LOG_FILE} 2>&1
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[FATAL]:-Register standby infomation failed" verbose
+ exit 1
+ else
+ LOG_MSG "[INFO]:-Register standby to master successfully" verbose
+ fi
+
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; hawq stop master -a -M fast;" >> ${STANDBY_LOG_FILE}
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[ERROR]:-Stop HAWQ master failed" verbose
exit 1
+ else
+ LOG_MSG "[INFO]:-HAWQ master stopped" verbose
fi
- hawq stop master -a >> ${STANDBY_LOG_FILE}
- hawq start cluster -a >> ${STANDBY_LOG_FILE}
- env PGOPTIONS="-c gp_session_role=utility" psql -p ${master_port} -d template1 -c"select * from gp_segment_configuration;" 1>>${STANDBY_LOG_FILE} 2>&1
- popd >> ${STANDBY_LOG_FILE}
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; hawq start cluster -a;" >> ${STANDBY_LOG_FILE}
+ if [ $? -ne 0 ] ; then
+ LOG_MSG "[ERROR]:-Start HAWQ cluster failed" verbose
+ exit 1
+ else
+ LOG_MSG "[INFO]:-HAWQ cluster started" verbose
+ fi
+
+ ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+ "${SOURCE_PATH}; env PGOPTIONS=\"-c gp_session_role=utility\" $PSQL -p ${master_port} -d template1 \
+ -c\"select * from gp_segment_configuration;\";" >>${STANDBY_LOG_FILE} 2>&1
}
segment_init() {
source ${GPHOME}/greenplum_path.sh
- for tmp_path in `echo ${hawqSegmentTemp} | sed 's|,| |g'`; do
+ for tmp_path in `${ECHO} ${hawqSegmentTemp} | sed 's|,| |g'`; do
if [ ! -d ${tmp_path} ]; then
- echo "Temp directory is not exist, please create it" | tee -a ${SEGMENT_LOG_FILE}
- echo "Segment init failed on ${HOSTNAME}"
+ ${ECHO} "Temp directory is not exist, please create it" | tee -a ${SEGMENT_LOG_FILE}
+ ${ECHO} "Segment init failed on ${HOSTNAME}"
exit 1
else
if [ ! -w "${tmp_path}" ]; then
- echo "Do not have write permission to temp directory, please check" | tee -a ${SEGMENT_LOG_FILE}
- echo "Segment init failed on ${HOSTNAME}"
+ ${ECHO} "Do not have write permission to temp directory, please check" | tee -a ${SEGMENT_LOG_FILE}
+ ${ECHO} "Segment init failed on ${HOSTNAME}"
exit 1
fi
fi
@@ -332,18 +382,18 @@ segment_init() {
--shared_buffers=${shared_buffers} --backend_output=${log_dir}/segment.initdb 1>>${SEGMENT_LOG_FILE} 2>&1
if [ $? -ne 0 ] ; then
- echo "Postgres initdb failed" | tee -a ${SEGMENT_LOG_FILE}
- echo "Segment init failed on ${HOSTNAME}"
+ ${ECHO} "Postgres initdb failed" | tee -a ${SEGMENT_LOG_FILE}
+ ${ECHO} "Segment init failed on ${HOSTNAME}"
exit 1
fi
update_segment_pg_hba 1>>${SEGMENT_LOG_FILE} 2>&1
- ${GPHOME}/bin/pg_ctl -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o \
+ ${PG_CTL} -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o \
" -p ${hawq_port} --silent-mode=true -M segment -i" start >> ${SEGMENT_LOG_FILE}
if [ $? -ne 0 ] ; then
- echo "Segment init failed on ${HOSTNAME}" | tee -a ${SEGMENT_LOG_FILE}
+ ${ECHO} "Segment init failed on ${HOSTNAME}" | tee -a ${SEGMENT_LOG_FILE}
exit 1
fi
}
@@ -353,18 +403,18 @@ check_data_directorytory() {
default_mdd=~/hawq-data-directory/masterdd
default_sdd=~/hawq-data-directory/segmentdd
if [ "${hawq_data_directory}" = "${default_mdd}" ]; then
- mkdir -p ${default_mdd}
+ ${MKDIR} -p ${default_mdd}
elif [ "${hawq_data_directory}" = "${default_sdd}" ]; then
- mkdir -p ${default_sdd}
+ ${MKDIR} -p ${default_sdd}
fi
# Check if data directory already exist and clean.
if [ -d ${hawq_data_directory} ]; then
if [ "$(ls -A ${hawq_data_directory})" ] && [ "${hawq_data_directory}" != "" ]; then
- echo "Data directory ${hawq_data_directory} is not empty on ${HOSTNAME}"
+ ${ECHO} "Data directory ${hawq_data_directory} is not empty on ${HOSTNAME}"
exit 1
fi
else
- echo "Data directory ${hawq_data_directory} does not exist, please create it"
+ ${ECHO} "Data directory ${hawq_data_directory} does not exist, please create it"
exit 1
fi
}
@@ -373,11 +423,11 @@ check_temp_directory() {
# Check if temp directory exist.
for tmp_dir in ${tmp_dir_list}; do
if [ ! -d ${tmp_dir} ]; then
- echo "Temporary directory ${tmp_dir} does not exist, please create it"
+ ${ECHO} "Temporary directory ${tmp_dir} does not exist, please create it"
exit 1
fi
if [ ! -w ${tmp_dir} ]; then
- echo "Temporary directory ${tmp_dir} is not writable, exit." ;
+ ${ECHO} "Temporary directory ${tmp_dir} is not writable, exit." ;
exit 1
fi
done
@@ -389,13 +439,14 @@ if [ ${object_type} == "master" ]; then
check_temp_directory
master_init
elif [ ${object_type} == "standby" ]; then
+ check_data_directorytory
standby_init
elif [ ${object_type} == "segment" ]; then
check_data_directorytory
check_temp_directory
segment_init
else
- echo "Please input correct node type"
+ ${ECHO} "Please input correct node object"
exit 1
fi
exit 0
[2/2] incubator-hawq git commit: HAWQ-40. Refact hawq standby init.
Updated GUC name.
Posted by rl...@apache.org.
HAWQ-40. Refact hawq standby init. Updated GUC name.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2561ab74
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2561ab74
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2561ab74
Branch: refs/heads/master
Commit: 2561ab74d9bcbe45c33d148d18ba0a7204cb80a4
Parents: d0c9c1d
Author: rlei <rl...@pivotal.io>
Authored: Fri Nov 13 11:45:25 2015 +0800
Committer: stanlyxiang <st...@gmail.com>
Committed: Fri Nov 13 11:55:27 2015 +0800
----------------------------------------------------------------------
tools/bin/hawq_ctl | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2561ab74/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 4890cd4..334e78a 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -148,12 +148,12 @@ class HawqInit:
return result
def set_vsegment_num_per_node(self):
- cmd = "%s; hawq config -c hawq_resourcemanager_query_vsegment_number_per_segment_limit \
+ cmd = "%s; hawq config -c hawq_rm_nvseg_perquery_perseg_limit \
-v %s --skipvalidation -q > /dev/null" % \
(source_hawq_env, self.vseg_num_per_node)
result = local_ssh(cmd, logger)
if result != 0:
- logger.warn("Set hawq_resourcemanager_query_vsegment_number_per_segment_limit failed")
+ logger.warn("Set hawq_rm_nvseg_perquery_perseg_limit failed")
return result
def _get_master_init_cmd(self):
@@ -307,7 +307,7 @@ class HawqInit:
else:
logger.info("Segments init successfully on nodes '%s'" % self.host_list)
- if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
+ if self.standby_host_name.lower() not in ('', 'none'):
check_return_code(self._init_standby(), logger, \
"Init standby failed, exit", \
"Init standby successfully")
@@ -329,7 +329,7 @@ class HawqInit:
logger.info("Try to remove standby master")
self.hawq_remove_standby()
elif self.node_type == "standby":
- if self.standby_host_name in ('', 'None', 'none', 'NONE'):
+ if self.standby_host_name.lower() in ('', 'none'):
logger.info("No standby host found")
logger.info("Please check your standby host name")
sys.exit(1)
@@ -452,7 +452,7 @@ class HawqStart:
def _start_all_nodes(self):
logger.info("Start all the nodes in hawq cluster")
- if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
+ if self.standby_host_name.lower() not in ('', 'none'):
logger.info("Starting standby master '%s'" % self.standby_host_name)
check_return_code(self.start_standby(), logger, "Standby master start failed, exit",
"Standby master started successfully")
@@ -630,7 +630,7 @@ class HawqStop:
logger.error("Master stop failed")
else:
logger.info("Master stopped successfully")
- if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
+ if self.standby_host_name.lower() not in ('', 'none'):
result = self._stop_standby()
if result != 0:
logger.error("Standby master stop failed")