You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2015/11/02 03:57:26 UTC

incubator-hawq git commit: HAWQ-109. Fix exit code of hawq command line tools.

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 85884fc0d -> 9303e67d6


HAWQ-109. Fix exit code of hawq command line tools.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/9303e67d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/9303e67d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/9303e67d

Branch: refs/heads/master
Commit: 9303e67d68c374dddee8cebcadfd4265d928a8ad
Parents: 85884fc
Author: rlei <rl...@pivotal.io>
Authored: Mon Nov 2 10:56:05 2015 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Mon Nov 2 10:57:15 2015 +0800

----------------------------------------------------------------------
 tools/bin/hawq                 |  40 +++----
 tools/bin/hawq_ctl             | 222 ++++++++++++------------------------
 tools/bin/hawqpylib/hawqlib.py |  14 ++-
 3 files changed, 104 insertions(+), 172 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/9303e67d/tools/bin/hawq
----------------------------------------------------------------------
diff --git a/tools/bin/hawq b/tools/bin/hawq
index 61920d6..f9ca034 100755
--- a/tools/bin/hawq
+++ b/tools/bin/hawq
@@ -6,7 +6,7 @@ import os
 import sys
 import subprocess
 from hawqpylib.HAWQ_HELP import *
-from hawqpylib.hawqlib import local_ssh
+from hawqpylib.hawqlib import check_return_code
 
 global hawq_home, source_hawq_env
 
@@ -21,11 +21,11 @@ def local_ssh(cmd):
     result = subprocess.Popen(cmd, shell=True).wait()
     return result
 
-
 def main():
     cluster_type_list = ('cluster', 'segment', 'master', 'standby', 'allsegments')
     cluster_init_list = ('cluster', 'segment', 'master', 'standby')
     hawq_home = os.getenv('GPHOME')
+    result = 0
 
     if not hawq_home:
         print "HAWQ home directory not defined, please check GPHOME settings."
@@ -51,71 +51,69 @@ def main():
             print START_HELP
             sys.exit(1)
         cmd = "%s; hawq_ctl %s %s" % (source_hawq_env, hawq_command, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "stop":
         if second_arg not in cluster_type_list:
             print STOP_HELP
             sys.exit(1)
         cmd = "%s; hawq_ctl %s %s" % (source_hawq_env, hawq_command, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "init":
         if second_arg not in cluster_init_list:
             print INIT_HELP
             sys.exit(1)
         cmd = "%s; hawq_ctl %s %s" % (source_hawq_env, hawq_command, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "restart":
         if second_arg not in cluster_type_list:
             print START_HELP
             sys.exit(1)
         cmd = "%s; hawq_ctl stop %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
         cmd = "%s; hawq_ctl start %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "activate":
         if second_arg in ['', 'help', '--help']:
             print ACTIVE_HELP
             sys.exit(1)
         cmd = "%s; hawq_ctl %s %s" % (source_hawq_env, hawq_command, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "config":
         if second_arg in ['', 'help', '--help']:
             print CONFIG_HELP
             sys.exit(1)
         cmd = "%s; hawqconfig %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
-    elif hawq_command == "create":
-        print "In construction..."
-        #print "hawq %s %s" % (hawq_command, sub_args)
+        result = local_ssh(cmd)
     elif hawq_command == "extract":
         cmd = "%s; hawqextract %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "check":
         cmd = "%s; gpcheck %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "load":
         cmd = "%s; gpload %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "filespace":
         cmd = "%s; hawqfilespace %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "state":
         cmd = "%s; hawqstate %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "ssh":
         cmd = "%s; gpssh %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "ssh-exkeys":
         cmd = "%s; gpssh-exkeys %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "scp":
         cmd = "%s; gpscp %s" % (source_hawq_env, sub_args)
-        local_ssh(cmd)
+        result = local_ssh(cmd)
     elif hawq_command == "version" or hawq_command == "--version":
         print_version()
     else:
         print COMMON_HELP
-        sys.exit()
+        sys.exit(1)
+    check_return_code(result)
 
 if __name__ == '__main__':
     main()

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/9303e67d/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index 11c9563..b01f97c 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -19,7 +19,7 @@ try:
     from gppylib.commands import gp
     from gppylib import userinput
     from hawqpylib.hawqlib import local_ssh, HawqCommands, HawqXMLParser, parse_hosts_file,\
-        remove_property_xml, sync_hawq_site
+        remove_property_xml, sync_hawq_site, check_return_code
     from hawqpylib.HAWQ_HELP import *
     from gppylib.db import dbconn
     from pygresql.pg import DatabaseError
@@ -133,11 +133,7 @@ class HawqInit:
               (self.GPHOME, self.dfs_url, self.enable_secure_filesystem, self.krb_server_keyfile)
         logger.info("Check if hdfs path is available.")
         logger.debug("Check hdfs: %s" % cmd)
-        result = local_ssh(cmd)
-        if result != 0:
-            logger.error("Check hdfs failed, please verify your hdfs settings.")
-            sys.exit(1)
-        return result
+        check_return_code(local_ssh(cmd), logger, "Check hdfs failed, please verify your hdfs settings.")
 
     def set_total_vsegment_num(self):
         cmd = "%s; hawq config -c default_segment_num -v %s --skipvalidation -q > /dev/null" % \
@@ -189,16 +185,10 @@ class HawqInit:
             signal.signal(signal.SIGINT,signal.SIG_IGN)
             cmd = "%s; hawq stop cluster -a -q" % source_hawq_env
             logger.info("Stop HAWQ cluster.")
-            result = local_ssh(cmd)
-            if result != 0:
-                logger.error("Stop HAWQ cluster failed, exit.")
-                sys.exit(1)
+            check_return_code(local_ssh(cmd), logger, "Stop HAWQ cluster failed, exit.")
             logger.info("Start HAWQ master.")
             cmd = "%s; hawq start master -m -q" % source_hawq_env
-            result = local_ssh(cmd)
-            if result != 0:
-                logger.error("Start HAWQ master failed, exit.")
-                sys.exit(1)
+            check_return_code(local_ssh(cmd), logger, "Start HAWQ master failed, exit.")
 
             try:
                 logger.info('Remove standby from Database catalog.')
@@ -215,17 +205,11 @@ class HawqInit:
                 logger.info('Database catalog updated successfully.')
                 logger.info("Stop HAWQ master.")
                 cmd = "%s; hawq stop master -a" % source_hawq_env
-                result = local_ssh(cmd)
-                if result != 0:
-                    logger.error("Stop hawq master failed, exit.")
-                    sys.exit(1)
+                check_return_code(local_ssh(cmd), logger, "Stop hawq master failed, exit.")
             except DatabaseError, ex:
                 logger.error("Failed to connect to database, this script can only be run when the database is up.")
                 cmd = "%s; hawq stop master -a" % source_hawq_env
-                result = local_ssh(cmd)
-                if result != 0:
-                    logger.error("Stop hawq master failed, exit.")
-                sys.exit(1)
+                check_return_code(local_ssh(cmd), logger, "Stop hawq master failed, exit.")
             remove_property_xml("hawq_standby_address_host", "%s/etc/hawq-site.xml" % self.GPHOME)
             host_list = parse_hosts_file(self.GPHOME)
             sync_hawq_site(self.GPHOME, host_list)
@@ -241,10 +225,8 @@ class HawqInit:
 
             logger.debug("rm -rf %s %s" % (self.master_data_directory, tmp_dir_list))
             cmd = "rm -rf %s %s" % (self.master_data_directory, tmp_dir_list)
-            result = remote_ssh(cmd, self.standby_host_name, self.user)
-            if result != 0:
-                logger.error("Delete standby master's directories failed, exit.")
-                sys.exit(1)
+            check_return_code(remote_ssh(cmd, self.standby_host_name, self.user), logger, \
+                              "Delete standby master's directories failed, exit.")
             signal.signal(signal.SIGINT,signal.default_int_handler)
             logger.info('Remove standby master finished.')
         else:
@@ -253,20 +235,15 @@ class HawqInit:
     def _resync_standby(self):
         logger.info("Re-sync standby.")
         cmd = "%s; hawq stop cluster -a" % source_hawq_env
-        result = local_ssh(cmd)
-        if result != 0:
-            logger.error("Stop hawq cluster failed, exit.")
-            sys.exit(1)
+        check_return_code(local_ssh(cmd), logger, "Stop hawq cluster failed, exit.")
         cmd = "cd %s; %s; %s/bin/lib/pysync.py -x gpperfmon/data -x pg_log -x db_dumps %s %s:%s" % \
                  (self.master_data_directory, source_hawq_env,  self.GPHOME, self.master_data_directory,
                   self.standby_host_name, self.master_data_directory)
         result = local_ssh(cmd)
-        if result != 0:
-            logger.error("Re-sync standby master failed, exit.")
-            sys.exit(1)
-        else:
-            cmd = "%s; hawq start cluster -a" % source_hawq_env
-            local_ssh(cmd)
+        check_return_code(result, logger, "Re-sync standby master failed, exit.")
+        cmd = "%s; hawq start cluster -a" % source_hawq_env
+        result = local_ssh(cmd)
+        check_return_code(result, logger, "Start hawq cluster failed.")
 
         return result
         
@@ -278,20 +255,13 @@ class HawqInit:
     def _init_cluster(self):
         logger.info("%s segment hosts defined" % self.hosts_count_number)
         logger.info("Set default_segment_num as: %s" % self.total_vseg_num)
-        result = self.set_total_vsegment_num()
-        if result != 0:
-            sys.exit(1)
-        result = self.set_vsegment_num_per_node()
-        if result != 0:
-            sys.exit(1)
+        check_return_code(self.set_total_vsegment_num())
+        check_return_code(self.set_vsegment_num_per_node())
+
         master_cmd = self._get_master_init_cmd()
         logger.info("Start to init master node: '%s'" % self.master_host_name)
-        result = local_ssh(master_cmd)
-        if result != 0:
-            logger.error("Master init failed, exit")
-            sys.exit(1)
-        else:
-            logger.info("Master init successfully")
+        check_return_code(local_ssh(master_cmd), logger, "Master init failed, exit", \
+                          "Master init successfully")
 
         segment_cmd_str = self._get_segment_init_cmd()
         # Execute segment init command on each segment nodes.
@@ -323,13 +293,9 @@ class HawqInit:
             scpcmd = "scp %s/etc/slaves %s:%s/etc/slaves > /dev/null" % \
                      (self.GPHOME, self.standby_host_name, self.GPHOME)
             local_ssh(scpcmd)
-            result = remote_ssh(standby_init_cmd, self.master_host_name, self.user)
-            if result != 0:
-                logger.error("Standby master init failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Standby master init successfully")
-                logger.info("HAWQ cluster init successfully")
+            check_return_code(remote_ssh(standby_init_cmd, self.master_host_name, self.user), logger, \
+                              "Standby master init failed, exit", "Standby master init successfully")
+            logger.info("HAWQ cluster init successfully")
         return None
 
     def run(self):
@@ -337,20 +303,12 @@ class HawqInit:
             self.check_hdfs_path()
             logger.info("%s segment hosts defined" % self.hosts_count_number)
             logger.info("Set default_segment_num as: %s" % self.total_vseg_num)
-            result = self.set_total_vsegment_num()
-            if result != 0:
-                sys.exit(1)
-            result = self.set_vsegment_num_per_node()
-            if result != 0:
-                sys.exit(1)
+            check_return_code(self.set_total_vsegment_num())
+            check_return_code(self.set_vsegment_num_per_node())
             logger.info("Start to init master")
             cmd = self._get_master_init_cmd()
-            result = local_ssh(cmd)
-            if result != 0:
-                logger.error("Master init failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Master init successfully")
+            check_return_code(local_ssh(cmd), logger, \
+                              "Master init failed, exit", "Master init successfully")
         elif self.node_type == "standby" and self.remove_standby is True:
             logger.info("Try to remove standby master")
             self.hawq_remove_standby()
@@ -358,29 +316,18 @@ class HawqInit:
             logger.info("Start to init standby master")
             logger.info("This might take couple minutes, please wait...")
             if self.no_update:
-                result = self._resync_standby()
-                if result != 0:
-                    logger.error("Standby master re-sync failed, exit")
-                    sys.exit(1)
-                else:
-                    logger.info("Standby master re-sync successfully")
+                check_return_code(self._resync_standby(), logger, \
+                                  "Standby master re-sync failed, exit", \
+                                  "Standby master re-sync successfully")
             else:
                 cmd = self._get_standby_init_cmd()
-                result = local_ssh(cmd)
-                if result != 0:
-                    logger.error("Standby master init failed, exit")
-                    sys.exit(1)
-                else:
-                    logger.info("Standby master init successfully")
+                check_return_code(local_ssh(cmd), logger, "Standby master init failed, exit", \
+                                  "Standby master init successfully")
 
         elif self.node_type == "segment":
             cmd = self._get_segment_init_cmd()
-            result = local_ssh(cmd)
-            if result != 0:
-                logger.error("Segment init failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Segment init successfully")
+            check_return_code(local_ssh(cmd), logger, "Segment init failed, exit", \
+                              "Segment init successfully")
 
         elif self.node_type == "cluster":
             self.check_hdfs_path()
@@ -487,21 +434,13 @@ class HawqStart:
     def _start_all_nodes(self):
         logger.info("Start all the nodes in hawq cluster")
         logger.info("Starting master node '%s'" % self.master_host_name)
-        result = self.start_master()
-        if result != 0:
-            logger.error("Master start failed, exit")
-            sys.exit(1)
-        else:
-            logger.info("Master started successfully")
+        check_return_code(self.start_master(), logger, "Master start failed, exit", \
+                          "Master started successfully")
 
         if self.standby_host_name not in ('', 'None', 'none', 'NONE'):
             logger.info("Starting standby master '%s'" % self.standby_host_name)
-            result = self.start_standby()
-            if result != 0:
-                logger.error("Standby master start failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Standby master started successfully")
+            check_return_code(self.start_standby(), logger, "Standby master start failed, exit", 
+                              "Standby master started successfully")
         segment_cmd_str = self._start_segment_cmd()
         logger.info("Start segments in list: %s" % self.host_list)
         work_list = []
@@ -550,34 +489,22 @@ class HawqStart:
 
     def run(self):
         if self.node_type == "master":
-            result = self.start_master()
-            if result != 0:
-                logger.error("Master start failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Master started successfully")
+            check_return_code(self.start_master(), logger, \
+                              "Master start failed, exit", "Master started successfully")
         elif self.node_type == "standby":
             if self.standby_host_name == '':
                 sys.exit(1)
-            result = self.start_standby()
-            if result != 0:
-                logger.error("Standby master start failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Standby master started successfully")
+            check_return_code(self.start_standby(), logger, \
+                              "Standby master start failed, exit", "Standby master started successfully")
         elif self.node_type == "segment":
-            result = self.start_segment()
-            if result != 0:
-                logger.error("Segment start failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Segment started successfully")
+            check_return_code(self.start_segment(), logger, \
+                              "Segment start failed, exit", "Segment started successfully")
         elif self.node_type == "cluster":
-            self._start_all_nodes()
+            check_return_code(self._start_all_nodes())
         elif self.node_type == "allsegments":
-            self._start_all_segments()
+            check_return_code(self._start_all_segments())
         else:
-            sys.exit('Node object should be in master/standby/segment/allsegments/cluster')
+            sys.exit('Node object should be in [master, standby, segment, allsegments, cluster]')
         return None
 
 
@@ -728,32 +655,20 @@ class HawqStop:
 
     def run(self):
         if self.node_type == "master":
-            result = self._stop_master()
-            if result != 0:
-                logger.error("Master stop failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Master stopped successfully")
+            check_return_code(self._stop_master(), logger, \
+                              "Master stop failed, exit", "Master stopped successfully")
         elif self.node_type == "standby":
-            result = self._stop_standby()
-            if result != 0:
-                logger.error("Standby master stop failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Standby master stopped successfully")
+            check_return_code(self._stop_standby(), logger, \
+                              "Standby master stop failed, exit", "Standby master stopped successfully")
         elif self.node_type == "segment":
-            result = self._stop_segment()
-            if result != 0:
-                logger.error("Segment stop failed, exit")
-                sys.exit(1)
-            else:
-                logger.info("Segment stopped successfully")
+            check_return_code(self._stop_segment(), logger, \
+                              "Segment stop failed, exit", "Segment stopped successfully")
         elif self.node_type == "cluster":
-            self._stopAll()
+            check_return_code(self._stopAll())
         elif self.node_type == "allsegments":
-            self._stopAllSegments()
+            check_return_code(self._stopAllSegments())
         else:
-            sys.exit('node_type should be in master/standby/segment/allsegments/cluster')
+            sys.exit('Node object should be in [master, standby, segment, allsegments, cluster]')
         return None
 
 
@@ -886,24 +801,31 @@ def hawq_activate_standby(opts, hawq_dict):
     if result != 0:
         logger.debug("Stop cluster failed, try to stop it immediately.")
         cmd = "%s; hawq stop cluster -a -M immediate;" % source_hawq_env
-        local_ssh(cmd)
-    cmd = "%s; hawq config -c hawq_master_address_host -v %s --skipvalidation -q" % (source_hawq_env, hawq_dict['hawq_standby_address_host'])
-    local_ssh(cmd)
-    cmd = "%s; hawq config -c hawq_standby_address_host -v %s --skipvalidation -q" % (source_hawq_env, 'none')
-    local_ssh(cmd)
+        check_return_code(local_ssh(cmd), logger, "Stop cluster failed, exit.")
+
+    cmd = "%s; hawq config -c hawq_master_address_host -v %s --skipvalidation -q" % \
+           (source_hawq_env, hawq_dict['hawq_standby_address_host'])
+    check_return_code(local_ssh(cmd), logger, "Set hawq_master_address_host failed.")
+
+    cmd = "%s; hawq config -c hawq_standby_address_host -v %s --skipvalidation -q" % \
+           (source_hawq_env, 'none')
+    check_return_code(local_ssh(cmd), logger, "Set hawq_standby_address_host failed.")
+
     cmd = '''echo "gp_persistent_repair_global_sequence = true" >> %s/%s''' % (hawq_dict['hawq_master_directory'], 'postgresql.conf')
-    local_ssh(cmd)
+    check_return_code(local_ssh(cmd), logger, "Set gp_persistent_repair_global_sequence = true failed.")
+
     cmd = "%s; hawq start master" % source_hawq_env
-    local_ssh(cmd)
+    check_return_code(local_ssh(cmd), logger, "Start master failed.")
+
     cmd = "%s; env PGOPTIONS=\"-c gp_session_role=utility\" psql -p %s -d template1 -c \"select gp_remove_master_standby()\
             where (select count(*) from gp_segment_configuration where role='s') = 1;\"" % (source_hawq_env, hawq_dict['hawq_master_address_port'])
-    local_ssh(cmd)
+    result = local_ssh(cmd)
     cmd = "%s; hawq stop master -a" % source_hawq_env
-    local_ssh(cmd)
+    check_return_code(local_ssh(cmd), logger, "Stop master failed.")
     cmd = "%s; hawq start cluster" % source_hawq_env
-    local_ssh(cmd)
+    check_return_code(local_ssh(cmd), logger, "Start cluster failed.")
     cmd = '''sed -i "/gp_persistent_repair_global_sequence/d" %s/%s''' % (hawq_dict['hawq_master_directory'], 'postgresql.conf')
-    local_ssh(cmd)
+    check_return_code(local_ssh(cmd))
     return None
 
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/9303e67d/tools/bin/hawqpylib/hawqlib.py
----------------------------------------------------------------------
diff --git a/tools/bin/hawqpylib/hawqlib.py b/tools/bin/hawqpylib/hawqlib.py
index 16e3c5e..9d7e25f 100755
--- a/tools/bin/hawqpylib/hawqlib.py
+++ b/tools/bin/hawqpylib/hawqlib.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-import os
+import os, sys
 import subprocess
 import threading
 from xml.dom import minidom
@@ -95,6 +95,18 @@ def remote_ssh(cmd, host, user):
     result = subprocess.Popen(remote_cmd_str, shell=True).wait()
     return result
 
+def check_return_code(result, logger = None,  error_msg = None, info_msg = None, exit_true = False):
+    '''Check shell command exit code.'''
+    if result != 0:
+        if error_msg and logger:
+            logger.error(error_msg)
+        sys.exit(1)
+    else:
+        if info_msg and logger:
+            logger.info(info_msg)
+        if exit_true:
+            sys.exit(0)
+    return result
 
 def parse_hosts_file(GPHOME):
     host_file = "%s/etc/slaves" % GPHOME