You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2015/11/13 04:56:05 UTC

[1/2] incubator-hawq git commit: HAWQ-40. Refact hawq standby init. Change standby start sequence and do init directly on standby host.

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 8c633bf53 -> 2561ab74d


HAWQ-40. Refact hawq standby init. Change standby start sequence and do init directly on standby host.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/d0c9c1de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/d0c9c1de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/d0c9c1de

Branch: refs/heads/master
Commit: d0c9c1de4ab4eb8d5a6fe44345cda28a70486c81
Parents: 8c633bf
Author: rlei <rl...@pivotal.io>
Authored: Fri Nov 13 11:08:56 2015 +0800
Committer: stanlyxiang <st...@gmail.com>
Committed: Fri Nov 13 11:55:26 2015 +0800

----------------------------------------------------------------------
 tools/bin/hawq_ctl                   | 157 +++++++++-------
 tools/bin/hawqpylib/hawqlib.py       |   6 +-
 tools/bin/lib/hawq_bash_functions.sh | 132 +++++++++++++
 tools/bin/lib/hawqinit.sh            | 301 +++++++++++++++++-------------
 4 files changed, 404 insertions(+), 192 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 880451a..4890cd4 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -66,7 +66,7 @@ class HawqInit:
                 if item in self.hawq_dict:
                     logger.info("Check: %s is set" % item)
                 else:
-                    sys.exit("Check: %s not configured in hawq-site.xml." % item)
+                    sys.exit("Check: %s not configured in hawq-site.xml" % item)
 
         self.master_host_name = self.hawq_dict['hawq_master_address_host']
         self.master_port = self.hawq_dict['hawq_master_address_port']
@@ -88,9 +88,13 @@ class HawqInit:
             self.standby_host_name = self.hawq_dict['hawq_standby_address_host']
             self.standby_port = self.master_port
             self.standby_address = self.standby_host_name + ":" + self.standby_port
+            if self.standby_host_name in (self.master_host_name, 'localhost', '127.0.0.1'):
+                logger.error("Standby host should not be the same as master host")
+                sys.exit(1)
         else:
             logger.info("No standby host configured, skip it")
             self.standby_host_name = ''
+
         if 'enable_secure_filesystem' in self.hawq_dict:
             self.enable_secure_filesystem=self.hawq_dict['enable_secure_filesystem']
             self.krb_server_keyfile =self.hawq_dict['krb_server_keyfile']
@@ -131,16 +135,16 @@ class HawqInit:
     def check_hdfs_path(self):
         cmd = "%s/bin/gpcheckhdfs hdfs %s %s %s" % \
               (self.GPHOME, self.dfs_url, self.enable_secure_filesystem, self.krb_server_keyfile)
-        logger.info("Check if hdfs path is available.")
+        logger.info("Check if hdfs path is available")
         logger.debug("Check hdfs: %s" % cmd)
-        check_return_code(local_ssh(cmd, logger, warning = True), logger, "Check hdfs failed, please verify your hdfs settings.")
+        check_return_code(local_ssh(cmd, logger, warning = True), logger, "Check hdfs failed, please verify your hdfs settings")
 
     def set_total_vsegment_num(self):
         cmd = "%s; hawq config -c default_segment_num -v %s --skipvalidation -q > /dev/null" % \
                (source_hawq_env, self.total_vseg_num)
         result = local_ssh(cmd, logger)
         if result != 0:
-            logger.warn("Set default_segment_num failed.")
+            logger.warn("Set default_segment_num failed")
         return result
 
     def set_vsegment_num_per_node(self):
@@ -149,7 +153,7 @@ class HawqInit:
               (source_hawq_env, self.vseg_num_per_node)
         result = local_ssh(cmd, logger)
         if result != 0:
-            logger.warn("Set hawq_resourcemanager_query_vsegment_number_per_segment_limit failed.")
+            logger.warn("Set hawq_resourcemanager_query_vsegment_number_per_segment_limit failed")
         return result
 
     def _get_master_init_cmd(self):
@@ -163,7 +167,7 @@ class HawqInit:
         return cmd
 
     def hawq_remove_standby(self):
-        """Removes the standby master."""
+        """Removes the standby master"""
         running_standby_host = ''
 
         try:
@@ -173,7 +177,7 @@ class HawqInit:
             rows = dbconn.execSQL(conn, query)
             conn.close()
         except DatabaseError, ex:
-            logger.error("Failed to connect to database, this script can only be run when the database is up.")
+            logger.error("Failed to connect to database, this script can only be run when the database is up")
             sys.exit(1)
 
         for row in rows:
@@ -183,12 +187,14 @@ class HawqInit:
         if running_standby_host:
             logger.info("running standby host is %s" % running_standby_host)
             signal.signal(signal.SIGINT,signal.SIG_IGN)
-            cmd = "%s; hawq stop cluster -a -q" % source_hawq_env
-            logger.info("Stop HAWQ cluster.")
-            check_return_code(local_ssh(cmd, logger), logger, "Stop HAWQ cluster failed, exit.")
-            logger.info("Start HAWQ master.")
+            logger.info("Stop HAWQ cluster")
+            cmd = "%s; hawq stop master -a -q" % source_hawq_env
+            check_return_code(local_ssh(cmd, logger), logger, "Stop HAWQ master failed, exit")
+            cmd = "%s; hawq stop allsegments -a -q" % source_hawq_env
+            check_return_code(local_ssh(cmd, logger), logger, "Stop HAWQ segments failed, exit")
+            logger.info("Start HAWQ master")
             cmd = "%s; hawq start master -m -q" % source_hawq_env
-            check_return_code(local_ssh(cmd, logger), logger, "Start HAWQ master failed, exit.")
+            check_return_code(local_ssh(cmd, logger), logger, "Start HAWQ master failed, exit")
 
             try:
                 logger.info('Remove standby from Database catalog.')
@@ -201,15 +207,15 @@ class HawqInit:
                 #conn.close()
                 cmd = 'env PGOPTIONS="-c gp_session_role=utility" %s/bin/psql -p %s -d template1 -c \
                     "select gp_remove_master_standby();"' % (self.GPHOME, self.master_port)
-                local_ssh(cmd, logger)
-                logger.info('Database catalog updated successfully.')
-                logger.info("Stop HAWQ master.")
+                check_return_code(local_ssh(cmd, logger), logger, \
+                                  "Update catalog failed, exit", "Catalog updated successfully.")
+                logger.info("Stop HAWQ master")
                 cmd = "%s; hawq stop master -a" % source_hawq_env
-                check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit.")
+                check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit")
             except DatabaseError, ex:
-                logger.error("Failed to connect to database, this script can only be run when the database is up.")
+                logger.error("Failed to connect to database, this script can only be run when the database is up")
                 cmd = "%s; hawq stop master -a" % source_hawq_env
-                check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit.")
+                check_return_code(local_ssh(cmd, logger), logger, "Stop hawq master failed, exit")
             remove_property_xml("hawq_standby_address_host", "%s/etc/hawq-site.xml" % self.GPHOME)
             host_list = parse_hosts_file(self.GPHOME)
             sync_hawq_site(self.GPHOME, host_list)
@@ -226,24 +232,42 @@ class HawqInit:
             logger.debug("rm -rf %s %s" % (self.master_data_directory, tmp_dir_list))
             cmd = "rm -rf %s %s" % (self.master_data_directory, tmp_dir_list)
             check_return_code(remote_ssh(cmd, self.standby_host_name, self.user), logger, \
-                              "Delete standby master's directories failed, exit.")
+                              "Delete standby master's directories failed, exit")
             signal.signal(signal.SIGINT,signal.default_int_handler)
-            logger.info('Remove standby master finished.')
+            logger.info('Remove standby master finished')
         else:
-            logger.info("Do not find a running standby master.")
+            logger.info("Do not find a running standby master")
+
+    def _init_standby(self):
+        logger.info("Start to init standby master: '%s'" % self.standby_host_name)
+        logger.info("This might take a couple of minutes, please wait...")
+        # Sync config files from master.
+        scpcmd = "scp %s/etc/_mgmt_config %s:%s/etc/_mgmt_config > /dev/null" % \
+                 (self.GPHOME, self.standby_host_name, self.GPHOME)
+        check_return_code(remote_ssh(scpcmd, self.master_host_name, self.user), \
+                          logger, "Sync _mgmt_config failed")
+        scpcmd = "scp %s/etc/slaves %s:%s/etc/slaves > /dev/null" % \
+                 (self.GPHOME, self.standby_host_name, self.GPHOME)
+        check_return_code(remote_ssh(scpcmd, self.master_host_name, self.user), \
+                          logger, "Sync slaves file failed")
+
+        standby_init_cmd = self._get_standby_init_cmd()
+
+        return check_return_code(remote_ssh_nowait(standby_init_cmd, self.standby_host_name, self.user))
+
 
     def _resync_standby(self):
-        logger.info("Re-sync standby.")
+        logger.info("Re-sync standby")
         cmd = "%s; hawq stop cluster -a" % source_hawq_env
-        check_return_code(local_ssh(cmd, logger), logger, "Stop hawq cluster failed, exit.")
+        check_return_code(local_ssh(cmd, logger), logger, "Stop hawq cluster failed, exit")
         cmd = "cd %s; %s; %s/bin/lib/pysync.py -x gpperfmon/data -x pg_log -x db_dumps %s %s:%s" % \
                  (self.master_data_directory, source_hawq_env,  self.GPHOME, self.master_data_directory,
                   self.standby_host_name, self.master_data_directory)
         result = local_ssh(cmd, logger)
-        check_return_code(result, logger, "Re-sync standby master failed, exit.")
+        check_return_code(result, logger, "Re-sync standby master failed, exit")
         cmd = "%s; hawq start cluster -a" % source_hawq_env
         result = local_ssh(cmd, logger)
-        check_return_code(result, logger, "Start hawq cluster failed.")
+        check_return_code(result, logger, "Start hawq cluster failed")
 
         return result
         
@@ -284,18 +308,10 @@ class HawqInit:
             logger.info("Segments init successfully on nodes '%s'" % self.host_list)
 
         if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
-            logger.info("Start to init standby master: '%s'" % self.standby_host_name)
-            logger.info("This might take a couple of minutes, please wait...")
-            standby_init_cmd = self._get_standby_init_cmd()
-            scpcmd = "scp %s/etc/_mgmt_config %s:%s/etc/_mgmt_config > /dev/null" % \
-                     (self.GPHOME, self.standby_host_name, self.GPHOME)
-            local_ssh(scpcmd)
-            scpcmd = "scp %s/etc/slaves %s:%s/etc/slaves > /dev/null" % \
-                     (self.GPHOME, self.standby_host_name, self.GPHOME)
-            local_ssh(scpcmd)
-            check_return_code(remote_ssh(standby_init_cmd, self.master_host_name, self.user), logger, \
-                              "Standby master init failed, exit", "Standby master init successfully")
-            logger.info("HAWQ cluster init successfully")
+            check_return_code(self._init_standby(), logger, \
+                              "Init standby failed, exit", \
+                              "Init standby successfully")
+        logger.info("Init HAWQ cluster successfully")
         return None
 
     def run(self):
@@ -313,16 +329,18 @@ class HawqInit:
             logger.info("Try to remove standby master")
             self.hawq_remove_standby()
         elif self.node_type == "standby":
-            logger.info("Start to init standby master")
-            logger.info("This might take couple minutes, please wait...")
+            if self.standby_host_name in ('', 'None', 'none', 'NONE'):
+                logger.info("No standby host found")
+                logger.info("Please check your standby host name")
+                sys.exit(1)
             if self.no_update:
                 check_return_code(self._resync_standby(), logger, \
                                   "Standby master re-sync failed, exit", \
                                   "Standby master re-sync successfully")
             else:
-                cmd = self._get_standby_init_cmd()
-                check_return_code(local_ssh(cmd, logger), logger, "Standby master init failed, exit", \
-                                  "Standby master init successfully")
+                check_return_code(self._init_standby(), logger, \
+                                  "Init standby failed, exit", \
+                                  "Init standby successfully")
 
         elif self.node_type == "segment":
             cmd = self._get_segment_init_cmd()
@@ -333,7 +351,7 @@ class HawqInit:
             self.check_hdfs_path()
             self._init_cluster()
         else:
-            sys.exit('node_type should be in master/standby/segment/cluster')
+            sys.exit('hawq init object should be one of master/standby/segment/cluster')
         return None
 
 class HawqStart:
@@ -364,7 +382,7 @@ class HawqStart:
 
         for item in check_items:
             if item not in self.hawq_dict:
-                logger.error("Check: %s not configured in hawq-site.xml." % item)
+                logger.error("Check: %s not configured in hawq-site.xml" % item)
                 sys.exit()
 
         self.master_host_name = self.hawq_dict['hawq_master_address_host']
@@ -433,14 +451,16 @@ class HawqStart:
 
     def _start_all_nodes(self):
         logger.info("Start all the nodes in hawq cluster")
-        logger.info("Starting master node '%s'" % self.master_host_name)
-        check_return_code(self.start_master(), logger, "Master start failed, exit", \
-                          "Master started successfully")
 
         if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
             logger.info("Starting standby master '%s'" % self.standby_host_name)
             check_return_code(self.start_standby(), logger, "Standby master start failed, exit", 
                               "Standby master started successfully")
+
+        logger.info("Starting master node '%s'" % self.master_host_name)
+        check_return_code(self.start_master(), logger, "Master start failed, exit", \
+                          "Master started successfully")
+
         segment_cmd_str = self._start_segment_cmd()
         logger.info("Start segments in list: %s" % self.host_list)
         work_list = []
@@ -455,6 +475,7 @@ class HawqStart:
             logger.error("Segments start failed")
         else:
             logger.info("Segments started successfully")
+            logger.info("HAWQ cluster started successfully")
         return node_init.return_flag
 
     def _start_all_segments(self):
@@ -531,7 +552,7 @@ class HawqStop:
 
         for item in check_items:
             if item not in self.hawq_dict:
-                sys.exit("Check: %s not configured in hawq-site.xml." % item)
+                sys.exit("Check: %s not configured in hawq-site.xml" % item)
 
         self.master_host_name = self.hawq_dict['hawq_master_address_host']
         self.master_port = self.hawq_dict['hawq_master_address_port']
@@ -743,13 +764,21 @@ def remote_ssh(cmd_str, host, user, q=None):
     result = subprocess.Popen(remote_cmd_str, shell=True, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
     stdout,stderr = result.communicate()
     if stdout and stdout != '':
-        logger.info(stdout)
-    if stderr and stdout != '':
-        logger.info(stderr)
+        logger.info(stdout.strip())
+    if stderr and stderr != '':
+        logger.info(stderr.strip())
     if q:
         q.put(("done", host, result.returncode))
     return result.returncode
 
+def remote_ssh_nowait(cmd, host, user):
+    if user == "":
+        remote_cmd_str = "ssh -o 'StrictHostKeyChecking no' %s \"%s\"" % (host, cmd)
+    else:
+        remote_cmd_str = "ssh -o 'StrictHostKeyChecking no' %s@%s \"%s\"" % (user, host, cmd)
+    result = subprocess.Popen(remote_cmd_str, shell=True).wait()
+    return result
+
 
 def check_progress(q, total_num, action, quiet=False):
     working_num = total_num
@@ -799,31 +828,31 @@ def hawq_activate_standby(opts, hawq_dict):
     cmd = "%s; hawq stop cluster -a -M fast;" % source_hawq_env
     result = local_ssh(cmd, logger)
     if result != 0:
-        logger.debug("Stop cluster failed, try to stop it immediately.")
+        logger.debug("Stop cluster failed, try to stop it immediately")
         cmd = "%s; hawq stop cluster -a -M immediate;" % source_hawq_env
-        check_return_code(local_ssh(cmd, logger), logger, "Stop cluster failed, exit.")
+        check_return_code(local_ssh(cmd, logger), logger, "Stop cluster failed, exit")
 
     cmd = "%s; hawq config -c hawq_master_address_host -v %s --skipvalidation -q" % \
            (source_hawq_env, hawq_dict['hawq_standby_address_host'])
-    check_return_code(local_ssh(cmd, logger), logger, "Set hawq_master_address_host failed.")
+    check_return_code(local_ssh(cmd, logger), logger, "Set hawq_master_address_host failed")
 
     cmd = "%s; hawq config -c hawq_standby_address_host -v %s --skipvalidation -q" % \
            (source_hawq_env, 'none')
-    check_return_code(local_ssh(cmd, logger), logger, "Set hawq_standby_address_host failed.")
+    check_return_code(local_ssh(cmd, logger), logger, "Set hawq_standby_address_host failed")
 
     cmd = '''echo "gp_persistent_repair_global_sequence = true" >> %s/%s''' % (hawq_dict['hawq_master_directory'], 'postgresql.conf')
-    check_return_code(local_ssh(cmd, logger), logger, "Set gp_persistent_repair_global_sequence = true failed.")
+    check_return_code(local_ssh(cmd, logger), logger, "Set gp_persistent_repair_global_sequence = true failed")
 
     cmd = "%s; hawq start master" % source_hawq_env
-    check_return_code(local_ssh(cmd, logger), logger, "Start master failed.")
+    check_return_code(local_ssh(cmd, logger), logger, "Start master failed")
 
     cmd = "%s; env PGOPTIONS=\"-c gp_session_role=utility\" psql -p %s -d template1 -c \"select gp_remove_master_standby()\
             where (select count(*) from gp_segment_configuration where role='s') = 1;\"" % (source_hawq_env, hawq_dict['hawq_master_address_port'])
     result = local_ssh(cmd, logger)
     cmd = "%s; hawq stop master -a" % source_hawq_env
-    check_return_code(local_ssh(cmd, logger), logger, "Stop master failed.")
+    check_return_code(local_ssh(cmd, logger), logger, "Stop master failed")
     cmd = "%s; hawq start cluster" % source_hawq_env
-    check_return_code(local_ssh(cmd, logger), logger, "Start cluster failed.")
+    check_return_code(local_ssh(cmd, logger), logger, "Start cluster failed")
     cmd = '''sed -i "/gp_persistent_repair_global_sequence/d" %s/%s''' % (hawq_dict['hawq_master_directory'], 'postgresql.conf')
     check_return_code(local_ssh(cmd, logger))
     return None
@@ -888,16 +917,16 @@ def create_parser():
                       dest="masteronly",
                       action="store_true",
                       default=False,
-                      help="Start hawq in utility mode.")
+                      help="Start hawq in utility mode")
     parser.add_option("-U", "--special-mode",
                       choices=['upgrade', 'maintenance'],
                       dest="special_mode",
-                      help="Start hawq in upgrade/maintenance mode.")
+                      help="Start hawq in upgrade/maintenance mode")
     parser.add_option("-R", "--restrict",
                       dest="restrict",
                       action="store_true",
                       default=False,
-                      help="Start hawq in restrict mode.")
+                      help="Start hawq in restrict mode")
     parser.add_option('-r', '--remove-standby', action='store_true',
                       dest='remove_standby', default=False,
                       help='Delete hawq standby master node.')
@@ -908,12 +937,12 @@ def create_parser():
                       type="int",
                       dest="virtual_seg_num",
                       default=8,
-                      help="Sets maximum number of virtual segments per node.")
+                      help="Sets maximum number of virtual segments per node")
     parser.add_option("--vsegment-number",
                       type="int",
                       dest="virtual_seg_num",
                       default=8,
-                      help="Sets maximum number of virtual segments per node.")
+                      help="Sets maximum number of virtual segments per node")
     parser.add_option("--locale",
                       dest="hawq_locale",
                       default="en_US.utf8",

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/hawqpylib/hawqlib.py
----------------------------------------------------------------------
diff --git a/tools/bin/hawqpylib/hawqlib.py b/tools/bin/hawqpylib/hawqlib.py
index b5da22c..bf9168c 100755
--- a/tools/bin/hawqpylib/hawqlib.py
+++ b/tools/bin/hawqpylib/hawqlib.py
@@ -87,12 +87,12 @@ def local_ssh(cmd, logger = None, warning = False):
     stdout,stderr = result.communicate()
     if logger:
         if stdout != '':
-            logger.info(stdout)
+            logger.info(stdout.strip())
         if stderr != '':
             if not warning:
-                logger.error(stderr)
+                logger.error(stderr.strip())
             else:
-                logger.warn(stderr)
+                logger.warn(stderr.strip())
     return result.returncode
 
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/lib/hawq_bash_functions.sh
----------------------------------------------------------------------
diff --git a/tools/bin/lib/hawq_bash_functions.sh b/tools/bin/lib/hawq_bash_functions.sh
new file mode 100755
index 0000000..f547c30
--- /dev/null
+++ b/tools/bin/lib/hawq_bash_functions.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+#Check that SHELL is /bin/bash
+if [ $SHELL != /bin/bash ] && [ `ls -al /bin/sh|grep -c bash` -ne 1 ];then
+    echo "[FATAL]:-Scripts must be run by a user account that has SHELL=/bin/bash"
+    if [ -f /bin/bash ];then
+        echo "[INFO]:-/bin/bash exists, please update user account shell"
+    else
+        echo "[WARN]:-/bin/bash does not exist, does bash need to be installed?"
+    fi
+    exit 2
+fi
+
+declare -a CMDPATH
+CMDPATH=(/usr/kerberos/bin /usr/sfw/bin /opt/sfw/bin /usr/local/bin /bin /usr/bin /sbin /usr/sbin /usr/ucb /sw/bin)
+
+findCmdInPath() {
+        cmdtofind=$1
+
+        if [ $cmdtofind = 'awk' ] && [ `uname` = SunOS ]; then
+            if [ -f "/usr/xpg4/bin/awk" ]; then
+                CMD=/usr/xpg4/bin/awk
+                echo $CMD
+                return
+            else
+                echo $cmdtofind
+                return "Problem in hawq_bash_functions, command '/usr/xpg4/bin/awk' not found. \
+                        You will need to edit the script named hawq_bash_functions.sh to \
+                        properly locate the needed commands for your platform."
+            fi
+        fi
+        for pathel in ${CMDPATH[@]}
+                do
+                CMD=$pathel/$cmdtofind
+                if [ x"$CMD" != x"" ] && [ -f $CMD ]; then
+                        echo $CMD
+                        return
+                fi
+        done
+        echo $cmdtofind
+        return "Problem in hawq_bash_functions, command '$cmdtofind' not found in COMMAND path. \
+                You will need to edit the script named hawq_bash_functions.sh to properly locate \
+                the needed commands for your platform."
+}
+
+AWK=`findCmdInPath awk`
+BASENAME=`findCmdInPath basename`
+CAT=`findCmdInPath cat`
+CLEAR=`findCmdInPath clear`
+CKSUM=`findCmdInPath cksum`
+CUT=`findCmdInPath cut`
+DATE=`findCmdInPath date`
+DD=`findCmdInPath dd`
+DIRNAME=`findCmdInPath dirname`
+DF=`findCmdInPath df`
+DU=`findCmdInPath du`
+ECHO=`findCmdInPath echo`
+EXPR=`findCmdInPath expr`
+FIND=`findCmdInPath find`
+TABECHO=$ECHO
+PROMPT="$ECHO"
+GREP=`findCmdInPath grep`
+GZIPCMD=`findCmdInPath gzip`
+EGREP=`findCmdInPath egrep`
+HEAD=`findCmdInPath head`
+HOSTNAME=`findCmdInPath hostname`
+IPCS=`findCmdInPath ipcs`
+IFCONFIG=`findCmdInPath ifconfig`
+KILL=`findCmdInPath kill`
+LESSCMD=`findCmdInPath less`
+LS=`findCmdInPath ls`
+LOCALE=`findCmdInPath locale`
+MV=`findCmdInPath mv`
+MORECMD=`findCmdInPath more`
+MKDIR=`findCmdInPath mkdir`
+MKFIFO=`findCmdInPath mkfifo`
+NETSTAT=`findCmdInPath netstat`
+PING=`findCmdInPath ping`
+PS=`findCmdInPath ps`
+PYTHON=${GPHOME}/ext/python/bin/python
+RM=`findCmdInPath rm`
+SCP=`findCmdInPath scp`
+SED=`findCmdInPath sed`
+SLEEP=`findCmdInPath sleep`
+SORT=`findCmdInPath sort`
+SPLIT=`findCmdInPath split`
+SSH=`findCmdInPath ssh`
+TAIL=`findCmdInPath tail`
+TAR=`findCmdInPath tar`
+TEE=`findCmdInPath tee`
+TOUCH=`findCmdInPath touch`
+TR=`findCmdInPath tr`
+WC=`findCmdInPath wc`
+WHICH=`findCmdInPath which`
+WHOAMI=`findCmdInPath whoami`
+ZCAT=`findCmdInPath zcat`
+
+CALL_HOST=`$HOSTNAME|$CUT -d. -f1`
+VERBOSE=0
+USER_NAME=`id|$AWK '{print $1}'|$CUT -d"(" -f2|$TR -d ')'`
+PROG_NAME=`echo $0 | $TR -d '-'`
+PROG_NAME=`$BASENAME $PROG_NAME`
+PROG_PIDNAME=`echo $$ $PROG_NAME | awk '{printf "%06d %s\n", $1, $2}'`
+LOG_FILE=/tmp/mylog
+#DEBUG_LEVEL=1
+
+LOG_MSG () {
+        EXIT_STATUS=0
+        TIME=`$DATE +%H":"%M":"%S`
+        CUR_DATE=`$DATE +%Y%m%d`
+        DISPLAY_TXT=0
+        #Check to see if we need to update value of EXIT_STATUS
+        if [ `$ECHO $1|$AWK -F"]" '{print $1}'|$TR -d '\133'|$GREP -c "WARN"` -eq 1 ];then
+            EXIT_STATUS=1
+        fi
+        if [ `$ECHO $1|$AWK -F"]" '{print $1}'|$TR -d '\133'|$GREP -c "FATAL"` -eq 1 ];then
+            EXIT_STATUS=2
+        fi
+        if [ `$ECHO $1|$AWK -F"]" '{print $1}'|$TR -d '\133'|$GREP -c "ERROR"` -eq 1 ];then
+            EXIT_STATUS=2
+        fi
+        if [ "$2" == "verbose" ] || [ "$2" == "VERBOSE" ] || [ "$2" == "v" ] || [ "$2" == "V" ]; then
+            VERBOSE=1
+        fi
+
+        if [ "$VERBOSE" == "1" ]; then
+            $ECHO "${CUR_DATE}:${TIME}:${PROG_PIDNAME}:${CALL_HOST}:${USER_NAME}-$1" | $TEE -a $LOG_FILE
+        else
+            $ECHO "${CUR_DATE}:${TIME}:${PROG_PIDNAME}:${CALL_HOST}:${USER_NAME}-$1" >> $LOG_FILE
+        fi
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/d0c9c1de/tools/bin/lib/hawqinit.sh
----------------------------------------------------------------------
diff --git a/tools/bin/lib/hawqinit.sh b/tools/bin/lib/hawqinit.sh
index 6a7e1d8..312f3c1 100755
--- a/tools/bin/lib/hawqinit.sh
+++ b/tools/bin/lib/hawqinit.sh
@@ -2,22 +2,25 @@
 
 object_type=$1
 GPHOME=$2
-
-current_time=`date '+%s'`
-mgmt_config_file=${GPHOME}/etc/_mgmt_config
+VERBOSE=0
+if [ "$3" == "1" ]; then
+    VERBOSE=1
+fi
+source ${GPHOME}/bin/lib/hawq_bash_functions.sh
+SOURCE_PATH="source ${GPHOME}/greenplum_path.sh"
+${SOURCE_PATH}
 
 if [ -f /etc/redhat-release ]; then
-    os_version=`cat /etc/redhat-release | awk '{print substr($7,0,1)}'`
+    os_version=`${CAT} /etc/redhat-release | ${AWK} '{print substr($7,0,1)}'`
 else
     os_version='other'
 fi
 
-source ${GPHOME}/greenplum_path.sh
-
+mgmt_config_file=${GPHOME}/etc/_mgmt_config
 if [ -f ${mgmt_config_file} ]; then
     source ${mgmt_config_file} > /dev/null 2>&1
 else
-    echo "${mgmt_config_file} is not exist, exit"
+    ${ECHO} "${mgmt_config_file} is not exist, exit"
     exit 1
 fi
 
@@ -30,7 +33,7 @@ elif [ ${object_type} = "segment" ]; then
    hawq_port=${segment_port}
    tmp_dir_list=${hawq_segment_temp_directory//,/ }
 else
-   echo "Node object should be master/standby/segment"
+   ${ECHO} "hawq init object should be one of master/standby/segment"
    exit 1
 fi
 master_max_connections=${max_connections}
@@ -38,19 +41,19 @@ segment_max_connections=${max_connections}
 master_ip_address_all=""
 standby_ip_address_all=""
 if [ "${os_version}" = "7" ];then
-    master_ip_address_all=`ssh ${master_host_name} "/sbin/ifconfig |grep -v '127.0.0' | grep 'inet '|awk '{print \\$2}'"`
+    master_ip_address_all=`${SSH} ${master_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' | ${GREP} 'inet '|${AWK} '{print \\$2}'"`
     if [ "${standby_host_name}" != "" ] && [ "${standby_host_name}" != "None" ] \
         && [ "${standby_host_name}" != "none" ] && [ "${standby_host_name}" != "NONE" ];then
-        standby_ip_address_all=`ssh ${standby_host_name} "/sbin/ifconfig |grep -v '127.0.0' | grep 'inet '|awk '{print \\$2}'"`
+        standby_ip_address_all=`${SSH} ${standby_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' | ${GREP} 'inet '|${AWK} '{print \\$2}'"`
     fi
-    segment_ip_address_all=`/sbin/ifconfig | grep -v '127.0.0' | awk '/inet addr/{print substr($2,6)}'`
+    segment_ip_address_all=`${IFCONFIG} | ${GREP} -v '127.0.0' | ${AWK} '/inet addr/{print substr($2,6)}'`
 else
-    master_ip_address_all=`ssh ${master_host_name} "/sbin/ifconfig |grep -v '127.0.0' |awk '/inet addr/{print substr(\\$2,6)}'"`
+    master_ip_address_all=`${SSH} ${master_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' |${AWK} '/inet addr/{print substr(\\$2,6)}'"`
     if [ "${standby_host_name}" != "" ] && [ "${standby_host_name}" != "None" ] \
         && [ "${standby_host_name}" != "none" ] && [ "${standby_host_name}" != "NONE" ];then
-        standby_ip_address_all=`ssh ${standby_host_name} "/sbin/ifconfig |grep -v '127.0.0' |awk '/inet addr/{print substr(\\$2,6)}'"`
+        standby_ip_address_all=`${SSH} ${standby_host_name} "${IFCONFIG} |${GREP} -v '127.0.0' |${AWK} '/inet addr/{print substr(\\$2,6)}'"`
     fi
-    segment_ip_address_all=`/sbin/ifconfig | grep -v '127.0.0' | awk '/inet addr/{print substr($2,6)}'`
+    segment_ip_address_all=`${IFCONFIG} | ${GREP} -v '127.0.0' | ${AWK} '/inet addr/{print substr($2,6)}'`
 fi
 
 PG_HBA=pg_hba.conf
@@ -59,13 +62,15 @@ TMP_PG_HBA=/tmp/pg_hba_conf_master.$$
 MASTER_LOG_FILE=${log_filename}
 STANDBY_LOG_FILE=${log_filename}
 SEGMENT_LOG_FILE=${log_filename}
+LOG_FILE=${log_filename}
 
 PSQL=${GPHOME}/bin/psql
+PG_CTL=${GPHOME}/bin/pg_ctl
 if [ "${log_dir}" = "None" ]; then
-log_dir=${HOME}/hawqAdminLogs
+    log_dir=${HOME}/hawqAdminLogs
 fi
 if [ ! -d ${log_dir} ]; then
-    mkdir -p ${log_dir}
+    ${MKDIR} -p ${log_dir}
 fi
 
 if [ ! -f ${log_filename} ]; then
@@ -73,103 +78,104 @@ if [ ! -f ${log_filename} ]; then
 fi
 
 GET_CIDRADDR () {
-    if [ `echo $1 | grep -c :` -gt 0 ]; then
-        echo $1/128
+    if [ `${ECHO} $1 | ${GREP} -c :` -gt 0 ]; then
+        ${ECHO} $1/128
     else
-        echo $1/32
+        ${ECHO} $1/32
     fi
 }
 
 LOAD_GP_TOOLKIT () {
-    CUR_DATE=`date +%Y%m%d`
-    FILE_TIME=`date +%H%M%S`
-    echo "[INFO]:-Loading hawq_toolkit..." >> ${MASTER_LOG_FILE}
+    CUR_DATE=`${DATE} +%Y%m%d`
+    FILE_TIME=`${DATE} +%H%M%S`
+    TOOLKIT_FILE=/tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME
+    LOG_MSG "[INFO]:-Loading hawq_toolkit..." verbose
     ROLNAME=`$PSQL -q -t -A -p ${hawq_port} -c "select rolname from pg_authid where oid=10" template1`
     if [ x"$ROLNAME" == x"" ];then
-        echo "[FATAL]:-Failed to retrieve rolname." | tee -a ${MASTER_LOG_FILE}
+        LOG_MSG "[FATAL]:-Failed to retrieve rolname." verbose
         exit 1
     fi
 
-    if [ -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME ]; then
-        rm -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME
+    if [ -f ${TOOLKIT_FILE} ]; then
+        ${RM} -f ${TOOLKIT_FILE}
     fi
 
     # We need SET SESSION AUTH here to load the toolkit
-    echo "SET SESSION AUTHORIZATION $ROLNAME;"  >> /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME 2>&1
+    ${ECHO} "SET SESSION AUTHORIZATION $ROLNAME;"  >> ${TOOLKIT_FILE} 2>&1
     RETVAL=$?
     if [ $RETVAL -ne 0 ];then
-        echo "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    cat $GPHOME/share/postgresql/gp_toolkit.sql >> /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME 2>&1
+    ${CAT} $GPHOME/share/postgresql/gp_toolkit.sql >> ${TOOLKIT_FILE} 2>&1
     RETVAL=$?
     if [ $RETVAL -ne 0 ];then
-        echo "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit sql file." | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    $PSQL -q -p ${hawq_port} -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME template1
+    $PSQL -q -p ${hawq_port} -f ${TOOLKIT_FILE} template1
     RETVAL=$?
     if [ $RETVAL -ne 0 ];then
-        echo "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE} 
+        ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE} 
         exit 1
     fi
 
-    $PSQL -q -p ${hawq_port} -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME postgres
+    $PSQL -q -p ${hawq_port} -f ${TOOLKIT_FILE} postgres
     RETVAL=$?
     if [ $RETVAL -ne 0 ];then
-        echo "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE} 
+        ${ECHO} "[FATAL]:-Failed to create the hawq_toolkit schema." | tee -a ${MASTER_LOG_FILE} 
         exit 1
     fi
 
-    rm -f /tmp/_gp_toolkit_tmp_${CUR_DATE}_$FILE_TIME
+    ${RM} -f ${TOOLKIT_FILE}
 
     return $RETVAL
 }
 
 update_master_pg_hba(){
     # Updatepg_hba.conf for master.
-    cat ${hawq_data_directory}/${PG_HBA} |grep '^#' > ${TMP_PG_HBA}
+    ${CAT} ${hawq_data_directory}/${PG_HBA} |${GREP} '^#' > ${TMP_PG_HBA}
     mv ${TMP_PG_HBA} ${hawq_data_directory}/${PG_HBA}
     # Setting local access"
-    echo "local    all         $USER         ident" >> ${hawq_data_directory}/${PG_HBA}
-    # echo "[INFO]:-Setting local host access"
-    echo "host     all         $USER         127.0.0.1/28    trust" >> ${hawq_data_directory}/${PG_HBA}
-    MASTER_IPV6_LOCAL_ADDRESS_ALL=(`/sbin/ip -6 address show |grep inet6|awk '{print $2}' |cut -d'/' -f1`)
-    MASTER_HBA_IP_ADDRESS=(`echo ${master_ip_address_all[@]} ${MASTER_IPV6_LOCAL_ADDRESS_ALL[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
+    ${ECHO} "local    all         $USER         ident" >> ${hawq_data_directory}/${PG_HBA}
+    # ${ECHO} "[INFO]:-Setting local host access"
+    ${ECHO} "host     all         $USER         127.0.0.1/28    trust" >> ${hawq_data_directory}/${PG_HBA}
+    MASTER_IPV6_LOCAL_ADDRESS_ALL=(`/sbin/ip -6 address show |${GREP} inet6|${AWK} '{print $2}' |cut -d'/' -f1`)
+    MASTER_HBA_IP_ADDRESS=(`${ECHO} ${master_ip_address_all[@]} ${MASTER_IPV6_LOCAL_ADDRESS_ALL[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
     for ip_address in ${MASTER_HBA_IP_ADDRESS[@]}; do
         CIDR_MASTER_IP=$(GET_CIDRADDR ${ip_address})
-        CHK_COUNT=`grep -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
+        CHK_COUNT=`${GREP} -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
         if [ "$CHK_COUNT" -eq "0" ];then
-            echo "host  all     ${USER}    ${CIDR_MASTER_IP}       trust" >> ${hawq_data_directory}/${PG_HBA}
+            ${ECHO} "host  all     ${USER}    ${CIDR_MASTER_IP}       trust" >> ${hawq_data_directory}/${PG_HBA}
         else
-            echo "${CIDR_MASTER_IP} already exist in ${hawq_data_directory}/${PG_HBA}"
+            ${ECHO} "${CIDR_MASTER_IP} already exist in ${hawq_data_directory}/${PG_HBA}"
         fi
     done
 }
 
 update_standby_pg_hba(){
     # Updatepg_hba.conf for standby master.
-    echo "host  all     all    0.0.0.0/0       trust" >> ${hawq_data_directory}/${PG_HBA}
+    ${ECHO} "host  all     all    0.0.0.0/0       trust" >> ${hawq_data_directory}/${PG_HBA}
 }
 
 update_segment_pg_hba(){
     # Updatepg_hba.conf for segment.
     # Setting local access"
-    MASTERS_HBA_IP_ADDRESSES=(`echo ${master_ip_address_all[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
+    MASTERS_HBA_IP_ADDRESSES=(`${ECHO} ${master_ip_address_all[@]} ${standby_ip_address_all[@]}|tr ' ' '\n'|sort -u|tr '\n' ' '`)
     for ip_address in ${MASTERS_HBA_IP_ADDRESSES[@]}; do
         CIDR_MASTER_IP=$(GET_CIDRADDR ${ip_address})
-        CHK_COUNT=`grep -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
+        CHK_COUNT=`${GREP} -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
         if [ "$CHK_COUNT" -eq "0" ];then
-            echo "host  all     all    ${CIDR_MASTER_IP}       trust" >> ${hawq_data_directory}/${PG_HBA}
+            ${ECHO} "host  all     all    ${CIDR_MASTER_IP}       trust" >> ${hawq_data_directory}/${PG_HBA}
         fi
     done
     for ip_address in ${segment_ip_address_all[@]}; do
         CIDR_MASTER_IP=$(GET_CIDRADDR ${ip_address})
-        CHK_COUNT=`grep -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
+        CHK_COUNT=`${GREP} -c ${CIDR_MASTER_IP} ${hawq_data_directory}/${PG_HBA}`
         if [ "$CHK_COUNT" -eq "0" ];then
-            echo "host  all     ${USER} ${CIDR_MASTER_IP}       trust" >> ${hawq_data_directory}/${PG_HBA}
+            ${ECHO} "host  all     ${USER} ${CIDR_MASTER_IP}       trust" >> ${hawq_data_directory}/${PG_HBA}
         fi
     done
 }
@@ -181,145 +187,189 @@ master_init() {
         --shared_buffers=${shared_buffers} --backend_output=${log_dir}/master.initdb 1>>${MASTER_LOG_FILE} 2>&1
 
     if [ $? -ne 0 ] ; then
-        echo "Master postgres initdb failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Master postgres initdb failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
     update_master_pg_hba 1>>${MASTER_LOG_FILE} 2>&1
 
-    ${GPHOME}/bin/pg_ctl -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o " -p ${hawq_port}  --silent-mode=true -M master -i" start >> ${MASTER_LOG_FILE}
+    ${PG_CTL} -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o " -p ${hawq_port}  --silent-mode=true -M master -i" start >> ${MASTER_LOG_FILE}
     if [ $? -ne 0  ] ; then
-        echo "Start hawq master failed"  | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Start hawq master failed"  | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create filespace dfs_system on hdfs ('${dfs_url}');" 1>>${MASTER_LOG_FILE} 2>&1
+    $PSQL -p ${hawq_port} -d template1 -c "create filespace dfs_system on hdfs ('${dfs_url}');" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Create filespace failed, please check your hdfs settings"  | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Create filespace failed, please check your hdfs settings"  | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create tablespace dfs_default filespace dfs_system;" 1>>${MASTER_LOG_FILE} 2>&1
+    $PSQL -p ${hawq_port} -d template1 -c "create tablespace dfs_default filespace dfs_system;" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Create tablespace failed"  | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Create tablespace failed"  | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c \
+    env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c \
         "SET allow_system_table_mods='dml';UPDATE pg_database SET dat2tablespace = (SELECT oid FROM pg_tablespace WHERE spcname = 'dfs_default') WHERE datname = 'template1';" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Configure database template1 failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Configure database template1 failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create database template0 tablespace dfs_default template template1;" 1>>${MASTER_LOG_FILE} 2>&1
+    $PSQL -p ${hawq_port} -d template1 -c "create database template0 tablespace dfs_default template template1;" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Create database template0 failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Create database template0 failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't', datallowconn = false WHERE datname = 'template0';" 1>>${MASTER_LOG_FILE} 2>&1
+    env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't', datallowconn = false WHERE datname = 'template0';" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Configure database template0 failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Configure database template0 failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "create database postgres tablespace dfs_default;" 1>>${MASTER_LOG_FILE} 2>&1
+    $PSQL -p ${hawq_port} -d template1 -c "create database postgres tablespace dfs_default;" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Create database postgres failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Create database postgres failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't' WHERE datname = 'postgres';" 1>>${MASTER_LOG_FILE} 2>&1
+    env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c "SET allow_system_table_mods='dml';UPDATE pg_database SET datistemplate = 't' WHERE datname = 'postgres';" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "Configure database postgres failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Configure database postgres failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
-    env PGOPTIONS="-c gp_session_role=utility" ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "CHECKPOINT;" 1>>${MASTER_LOG_FILE} 2>&1
+    env PGOPTIONS="-c gp_session_role=utility" $PSQL -p ${hawq_port} -d template1 -c "CHECKPOINT;" 1>>${MASTER_LOG_FILE} 2>&1
     if [ $? -ne 0  ] ; then
-        echo "CHECKPOINT failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "CHECKPOINT failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
 
     LOAD_GP_TOOLKIT
     if [ $? -ne 0  ] ; then
-        echo "Load TOOLKIT failed" | tee -a ${MASTER_LOG_FILE}
+        ${ECHO} "Load TOOLKIT failed" | tee -a ${MASTER_LOG_FILE}
         exit 1
     fi
-    ${GPHOME}/bin/psql -p ${hawq_port} -d template1 -c "alter user \"${USER}\" password 'gparray';" 1>>${MASTER_LOG_FILE} 2>&1
+    $PSQL -p ${hawq_port} -d template1 -c "alter user \"${USER}\" password 'gparray';" 1>>${MASTER_LOG_FILE} 2>&1
 
     if [ $? -ne 0  ] ; then
-        echo "Alter user failed" 1>> ${MASTER_LOG_FILE} 2>&1
+        ${ECHO} "Alter user failed" 1>> ${MASTER_LOG_FILE} 2>&1
         exit 1
     fi
 }
 
 standby_init() {
     # Make sure log file are created.
-    ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
-        "if [ ! -d ${log_dir} ]; then echo \"Try to create log directory for standby master.\"; mkdir -p ${log_dir}; fi" 
-    ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
-        "if [ ! -f ${STANDBY_LOG_FILE} ]; then touch ${STANDBY_LOG_FILE}; fi" 
-    STANDBY_IP_ADDRESSES=`ping -c1 -n ${standby_host_name} | head -n1 | sed 's/.*(\([0-9]*\.[0-9]*\.[0-9]*\.[0-9]*\)).*/\1/g'`
-    echo "Try to stop HAWQ cluster" 1>>${STANDBY_LOG_FILE}
-    source $GPHOME/greenplum_path.sh
-    # Stop hawq cluster before add new standby master.
-    hawq stop cluster -a >> ${STANDBY_LOG_FILE}
-    # Check if data directory are exist and keep clean.
-    ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
-        "if [ ! -d ${master_data_directory} ]; then echo \"Data directory ${master_data_directory} is not exist, please create it.\"; exit 1; fi" 
-    if [ $? -ne 0  ] ; then
-        echo "Standby master data directory check failed" | tee -a ${STANDBY_LOG_FILE}
+    if [ ! -f ${STANDBY_LOG_FILE} ]; then
+        touch ${STANDBY_LOG_FILE};
+    fi
+
+    LOG_MSG ""
+    LOG_MSG "[INFO]:-Stopping HAWQ cluster"
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; hawq stop allsegments -a -M fast;" >> ${STANDBY_LOG_FILE} 2>&1
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[ERROR]:-Stop segments failed" verbose
         exit 1
+    else
+        LOG_MSG "[INFO]:-HAWQ segments stopped" verbose
     fi
-    ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} \
-        "if [ \"\$(ls -A ${hawq_data_directory})\" ] && [ \"${hawq_data_directory}\" != \"\" ]; then echo \"Data directory ${hawq_data_directory} is not empty, please clean it.\"; exit 1; fi" 
-    if [ $? -ne 0  ] ; then
-        echo "Standby master data directory check failed" | tee -a ${STANDBY_LOG_FILE}
+
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; hawq stop master -a -M fast;" >> ${STANDBY_LOG_FILE} 2>&1
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[ERROR]:-Stop master failed" verbose
         exit 1
+    else
+        LOG_MSG "[INFO]:-HAWQ master stopped" verbose
     fi
-    pushd ${master_data_directory} >> ${STANDBY_LOG_FILE}
+
     # Sync data directories to standby master.
-    echo "Sync master files to standby from master" >> ${STANDBY_LOG_FILE}
-    tar cf - * --exclude="pg_log" --exclude="db_dumps" --exclude="gpperfmon/data"  \
-        | ssh ${standby_host_name} tar xf - -C ${master_data_directory} 1>>${STANDBY_LOG_FILE}
-    if [ $? -ne 0  ] ; then
-        echo "Sync master files to standby failed" | tee -a ${STANDBY_LOG_FILE}
+    LOG_MSG "[INFO]:-Sync files to standby from master"
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "cd ${master_data_directory}; \
+         ${SOURCE_PATH}; ${GPHOME}/bin/lib/pysync.py -x gpperfmon/data -x pg_log -x db_dumps \
+         ${master_data_directory} ${standby_host_name}:${master_data_directory};" >> ${STANDBY_LOG_FILE} 2>&1
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[FATAL]:-Sync master files to standby failed" verbose
         exit 1
     fi
+
+    ${MKDIR} -p ${master_data_directory}/pg_log | tee -a ${STANDBY_LOG_FILE}
+
+    STANDBY_IP_ADDRESSES=`${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${PING} -c1 -n ${standby_host_name} | head -n1 | sed 's/.*(\([0-9]*\.[0-9]*\.[0-9]*\.[0-9]*\)).*/\1/g';"`
+    if [ -z "${STANDBY_IP_ADDRESSES}" ] ; then
+        LOG_MSG "[FATAL]:-Standby ip address is empty" verbose
+        exit 1
+    else
+        LOG_MSG "[INFO]:-Standby ip address is ${STANDBY_IP_ADDRESSES}" verbose
+    fi
+
     
-    ssh -o 'StrictHostKeyChecking no' ${hawqUser}@${standby_host_name} "mkdir -p ${master_data_directory}/pg_log | tee -a ${STANDBY_LOG_FILE};"
-    hawq start standby -a >>  ${STANDBY_LOG_FILE}
-    hawq stop standby -a >>  ${STANDBY_LOG_FILE}
-    
-    hawq start master -a >> ${STANDBY_LOG_FILE}
-    env PGOPTIONS="-c gp_session_role=utility" psql -p ${master_port} -d template1 -c"select gp_remove_master_standby() where (select count(*) from gp_segment_configuration where role='s') = 1;" >> ${STANDBY_LOG_FILE} 2>&1
-    env PGOPTIONS="-c gp_session_role=utility" psql -p ${master_port} -d template1 -c \
-       "select gp_add_master_standby('${standby_host_name}','${STANDBY_IP_ADDRESSES}','');" 1>>${STANDBY_LOG_FILE} 2>&1
-    if [ $? -ne 0  ] ; then
-        echo "Register standby infomation failed" | tee -a ${STANDBY_LOG_FILE}
+    LOG_MSG "[INFO]:-Start hawq master" verbose
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; hawq start master -a --masteronly >> ${STANDBY_LOG_FILE}"
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[ERROR]:-Start HAWQ master failed" verbose
+        exit 1
+    else
+        LOG_MSG "[INFO]:-HAWQ master started" verbose
+    fi
+
+    LOG_MSG "[INFO]:-Try to remove existing standby from catalog" verbose
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; env PGOPTIONS=\"-c gp_session_role=utility\" $PSQL -p ${master_port} -d template1 \
+         -c\"select gp_remove_master_standby() where (select count(*) from gp_segment_configuration where role='s') = 1;\";" >> ${STANDBY_LOG_FILE} 2>&1
+
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; env PGOPTIONS=\"-c gp_session_role=utility\" $PSQL -p ${master_port} -d template1 -c \
+         \"select gp_add_master_standby('${standby_host_name}','${STANDBY_IP_ADDRESSES}','');\";" >>${STANDBY_LOG_FILE} 2>&1
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[FATAL]:-Register standby infomation failed" verbose
+        exit 1
+    else
+        LOG_MSG "[INFO]:-Register standby to master successfully" verbose
+    fi
+
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; hawq stop master -a -M fast;" >> ${STANDBY_LOG_FILE}
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[ERROR]:-Stop HAWQ master failed" verbose
         exit 1
+    else
+        LOG_MSG "[INFO]:-HAWQ master stopped" verbose
     fi
-    hawq stop master -a >> ${STANDBY_LOG_FILE}
     
-    hawq start cluster -a >> ${STANDBY_LOG_FILE}
-    env PGOPTIONS="-c gp_session_role=utility" psql -p ${master_port} -d template1 -c"select * from gp_segment_configuration;" 1>>${STANDBY_LOG_FILE} 2>&1
-    popd >> ${STANDBY_LOG_FILE}
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; hawq start cluster -a;" >> ${STANDBY_LOG_FILE}
+    if [ $? -ne 0 ] ; then
+        LOG_MSG "[ERROR]:-Start HAWQ cluster failed" verbose
+        exit 1
+    else
+        LOG_MSG "[INFO]:-HAWQ cluster started" verbose
+    fi
+
+    ${SSH} -o 'StrictHostKeyChecking no' ${hawqUser}@${master_host_name} \
+        "${SOURCE_PATH}; env PGOPTIONS=\"-c gp_session_role=utility\" $PSQL -p ${master_port} -d template1 \
+         -c\"select * from gp_segment_configuration;\";" >>${STANDBY_LOG_FILE} 2>&1
 
 }
 
 segment_init() {
     source ${GPHOME}/greenplum_path.sh
-    for tmp_path in `echo ${hawqSegmentTemp} | sed 's|,| |g'`; do
+    for tmp_path in `${ECHO} ${hawqSegmentTemp} | sed 's|,| |g'`; do
         if [ ! -d ${tmp_path} ]; then
-            echo "Temp directory is not exist, please create it" | tee -a ${SEGMENT_LOG_FILE}
-            echo "Segment init failed on ${HOSTNAME}"
+            ${ECHO} "Temp directory is not exist, please create it" | tee -a ${SEGMENT_LOG_FILE}
+            ${ECHO} "Segment init failed on ${HOSTNAME}"
             exit 1
         else
            if [ ! -w "${tmp_path}" ]; then 
-               echo "Do not have write permission to temp directory, please check" | tee -a ${SEGMENT_LOG_FILE}
-               echo "Segment init failed on ${HOSTNAME}"
+               ${ECHO} "Do not have write permission to temp directory, please check" | tee -a ${SEGMENT_LOG_FILE}
+               ${ECHO} "Segment init failed on ${HOSTNAME}"
                exit 1
            fi
         fi
@@ -332,18 +382,18 @@ segment_init() {
          --shared_buffers=${shared_buffers} --backend_output=${log_dir}/segment.initdb 1>>${SEGMENT_LOG_FILE} 2>&1
 
     if [ $? -ne 0 ] ; then
-        echo "Postgres initdb failed" | tee -a ${SEGMENT_LOG_FILE}
-        echo "Segment init failed on ${HOSTNAME}"
+        ${ECHO} "Postgres initdb failed" | tee -a ${SEGMENT_LOG_FILE}
+        ${ECHO} "Segment init failed on ${HOSTNAME}"
         exit 1
     fi
 
     update_segment_pg_hba 1>>${SEGMENT_LOG_FILE} 2>&1
 
-    ${GPHOME}/bin/pg_ctl -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o \
+    ${PG_CTL} -D ${hawq_data_directory} -l ${hawq_data_directory}/pg_log/startup.log -w -t 60 -o \
          " -p ${hawq_port} --silent-mode=true -M segment -i" start >> ${SEGMENT_LOG_FILE}
 
     if [ $? -ne 0  ] ; then
-        echo "Segment init failed on ${HOSTNAME}" | tee -a ${SEGMENT_LOG_FILE}
+        ${ECHO} "Segment init failed on ${HOSTNAME}" | tee -a ${SEGMENT_LOG_FILE}
         exit 1
     fi
     }
@@ -353,18 +403,18 @@ check_data_directorytory() {
     default_mdd=~/hawq-data-directory/masterdd
     default_sdd=~/hawq-data-directory/segmentdd
     if [ "${hawq_data_directory}" = "${default_mdd}" ]; then
-        mkdir -p ${default_mdd}
+        ${MKDIR} -p ${default_mdd}
     elif [ "${hawq_data_directory}" = "${default_sdd}" ]; then
-        mkdir -p ${default_sdd}
+        ${MKDIR} -p ${default_sdd}
     fi
     # Check if data directory already exist and clean.
     if [ -d ${hawq_data_directory} ]; then
         if [ "$(ls -A ${hawq_data_directory})" ] && [ "${hawq_data_directory}" != "" ]; then
-             echo "Data directory ${hawq_data_directory} is not empty on ${HOSTNAME}"
+             ${ECHO} "Data directory ${hawq_data_directory} is not empty on ${HOSTNAME}"
              exit 1
         fi
     else
-        echo "Data directory ${hawq_data_directory} does not exist, please create it"
+        ${ECHO} "Data directory ${hawq_data_directory} does not exist, please create it"
         exit 1
     fi
 }
@@ -373,11 +423,11 @@ check_temp_directory() {
     # Check if temp directory exist.
     for tmp_dir in ${tmp_dir_list}; do
         if [ ! -d ${tmp_dir} ]; then
-            echo "Temporary directory ${tmp_dir} does not exist, please create it"
+            ${ECHO} "Temporary directory ${tmp_dir} does not exist, please create it"
             exit 1
         fi
         if [ ! -w ${tmp_dir} ]; then
-            echo "Temporary directory ${tmp_dir} is not writable, exit." ;
+            ${ECHO} "Temporary directory ${tmp_dir} is not writable, exit." ;
             exit 1
         fi
     done
@@ -389,13 +439,14 @@ if [ ${object_type} == "master" ]; then
     check_temp_directory
     master_init
 elif [ ${object_type} == "standby" ]; then
+    check_data_directorytory
     standby_init
 elif [ ${object_type} == "segment" ]; then
     check_data_directorytory
     check_temp_directory
     segment_init
 else
-    echo "Please input correct node type"
+    ${ECHO} "Please input correct node object"
     exit 1
 fi
 exit 0


[2/2] incubator-hawq git commit: HAWQ-40. Refact hawq standby init. Updated GUC name.

Posted by rl...@apache.org.
HAWQ-40. Refact hawq standby init. Updated GUC name.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2561ab74
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2561ab74
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2561ab74

Branch: refs/heads/master
Commit: 2561ab74d9bcbe45c33d148d18ba0a7204cb80a4
Parents: d0c9c1d
Author: rlei <rl...@pivotal.io>
Authored: Fri Nov 13 11:45:25 2015 +0800
Committer: stanlyxiang <st...@gmail.com>
Committed: Fri Nov 13 11:55:27 2015 +0800

----------------------------------------------------------------------
 tools/bin/hawq_ctl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2561ab74/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 4890cd4..334e78a 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -148,12 +148,12 @@ class HawqInit:
         return result
 
     def set_vsegment_num_per_node(self):
-        cmd = "%s; hawq config -c hawq_resourcemanager_query_vsegment_number_per_segment_limit  \
+        cmd = "%s; hawq config -c hawq_rm_nvseg_perquery_perseg_limit \
               -v %s --skipvalidation -q > /dev/null" % \
               (source_hawq_env, self.vseg_num_per_node)
         result = local_ssh(cmd, logger)
         if result != 0:
-            logger.warn("Set hawq_resourcemanager_query_vsegment_number_per_segment_limit failed")
+            logger.warn("Set hawq_rm_nvseg_perquery_perseg_limit failed")
         return result
 
     def _get_master_init_cmd(self):
@@ -307,7 +307,7 @@ class HawqInit:
         else:
             logger.info("Segments init successfully on nodes '%s'" % self.host_list)
 
-        if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
+        if self.standby_host_name.lower() not in ('', 'none'):
             check_return_code(self._init_standby(), logger, \
                               "Init standby failed, exit", \
                               "Init standby successfully")
@@ -329,7 +329,7 @@ class HawqInit:
             logger.info("Try to remove standby master")
             self.hawq_remove_standby()
         elif self.node_type == "standby":
-            if self.standby_host_name in ('', 'None', 'none', 'NONE'):
+            if self.standby_host_name.lower() in ('', 'none'):
                 logger.info("No standby host found")
                 logger.info("Please check your standby host name")
                 sys.exit(1)
@@ -452,7 +452,7 @@ class HawqStart:
     def _start_all_nodes(self):
         logger.info("Start all the nodes in hawq cluster")
 
-        if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
+        if self.standby_host_name.lower() not in ('', 'none'):
             logger.info("Starting standby master '%s'" % self.standby_host_name)
             check_return_code(self.start_standby(), logger, "Standby master start failed, exit", 
                               "Standby master started successfully")
@@ -630,7 +630,7 @@ class HawqStop:
             logger.error("Master stop failed")
         else:
             logger.info("Master stopped successfully")
-        if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
+        if self.standby_host_name.lower() not in ('', 'none'):
             result = self._stop_standby()
             if result != 0:
                 logger.error("Standby master stop failed")