You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sw...@apache.org on 2017/02/15 01:53:22 UTC

ambari git commit: AMBARI-19995. Improve logging for HSI's llapstatus command polling. (Siddharth Seth via Swapan Shridhar)

Repository: ambari
Updated Branches:
  refs/heads/trunk c058866e1 -> 5b53d8481


AMBARI-19995. Improve logging for HSI's llapstatus command polling. (Siddharth Seth via Swapan Shridhar)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/5b53d848
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/5b53d848
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/5b53d848

Branch: refs/heads/trunk
Commit: 5b53d848191679e070a06f6ef4d327c155b79a0d
Parents: c058866
Author: Swapan Shridhar <ss...@hortonworks.com>
Authored: Tue Feb 14 17:45:24 2017 -0800
Committer: Swapan Shridhar <ss...@hortonworks.com>
Committed: Tue Feb 14 17:52:46 2017 -0800

----------------------------------------------------------------------
 .../package/scripts/hive_server_interactive.py  | 37 +++++++++++++-------
 .../HIVE/configuration/llap-cli-log4j2.xml      | 22 ++++++++++--
 2 files changed, 43 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/5b53d848/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py
index 2ab48ae..46864c0 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py
@@ -253,7 +253,6 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
     """
     def _llap_start(self, env, cleanup=False):
       import params
-      Logger.info("Entered _llap_start()")
       env.set_params(params)
 
       if params.hive_server_interactive_ha:
@@ -328,8 +327,8 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
 
       run_file_path = None
       try:
-        Logger.info(format("Command: {cmd}"))
-        code, output, error = shell.checked_call(cmd, user=params.hive_user, stderr=subprocess.PIPE, logoutput=True)
+        Logger.info(format("LLAP start command: {cmd}"))
+        code, output, error = shell.checked_call(cmd, user=params.hive_user, quiet = True, stderr=subprocess.PIPE, logoutput=True)
 
         if code != 0 or output is None:
           raise Fail("Command failed with either non-zero return code or no output.")
@@ -441,7 +440,13 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
       code, output, error = shell.checked_call(llap_status_cmd, user=status_params.hive_user, stderr=subprocess.PIPE,
                                                logoutput=False)
       Logger.info("Received 'llapstatus' command 'output' : {0}".format(output))
-      return self._make_valid_json(output)
+      if code == 0:
+        return self._make_valid_json(output)
+      else:
+        Logger.info("'LLAP status command' output : ", output)
+        Logger.info("'LLAP status command' error : ", error)
+        Logger.info("'LLAP status command' exit code : ", code)
+        raise Fail("Error getting LLAP app status. ")
 
     """
     Get llap app status data for LLAP GA code base.
@@ -461,10 +466,20 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
       #
       #            example : llapstatus -w -r 0.8 -i 2 -t 150
       llap_status_cmd = format("{stack_root}/current/hive-server2-hive2/bin/hive --service llapstatus -w -r {percent_desired_instances_to_be_up} -i {refresh_rate} -t {total_timeout}")
-      code, output, error = shell.checked_call(llap_status_cmd, user=status_params.hive_user, stderr=subprocess.PIPE,
-                                               logoutput=False)
-      Logger.info("Received 'llapstatus' command 'output' : {0}".format(output))
-      return self._make_valid_json(output)
+      Logger.info("\n\n\n\n\n");
+      Logger.info("LLAP status command : {0}".format(llap_status_cmd))
+      code, output, error = shell.checked_call(llap_status_cmd, user=status_params.hive_user, quiet=True, stderr=subprocess.PIPE,
+                                               logoutput=True)
+
+      if code == 0:
+        return self._make_valid_json(output)
+      else:
+        Logger.info("'LLAP status command' output : ", output)
+        Logger.info("'LLAP status command' error : ", error)
+        Logger.info("'LLAP status command' exit code : ", code)
+        raise Fail("Error getting LLAP app status. ")
+
+
 
 
     """
@@ -526,7 +541,6 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
               marker_idx = idx
               break;
 
-      Logger.info("Marker index for start of JSON data for 'llapsrtatus' comamnd : {0}".format(marker_idx))
 
       # Remove extra logging from possible JSON output
       if marker_idx is None:
@@ -534,7 +548,6 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
       else:
         if marker_idx != 0:
           del splits[0:marker_idx]
-          Logger.info("Removed lines: '1-{0}' from the received 'llapstatus' output to make it valid for JSON parsing.".format(marker_idx))
 
       scanned_output = '\n'.join(splits)
       llap_app_info = json.loads(scanned_output)
@@ -554,7 +567,6 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
                 num_retries :   Number of retries to check the LLAP app status.
     """
     def check_llap_app_status_in_llap_tp(self, llap_app_name, num_retries, return_immediately_if_stopped=False):
-      Logger.info("Entered check_llap_app_status_in_hdp_tp")
       curr_time = time.time()
       num_retries = int(num_retries)
       if num_retries <= 0:
@@ -579,10 +591,9 @@ class HiveServerInteractiveDefault(HiveServerInteractive):
         return False
 
     def check_llap_app_status_in_llap_ga(self, llap_app_name, num_retries, return_immediately_if_stopped=False):
-      Logger.info("Entered check_llap_app_status_in_llap_ga()")
       curr_time = time.time()
       total_timeout = int(num_retries) * 20; # Total wait time while checking the status via llapstatus command
-      Logger.info("Calculated 'total_timeout' : {0} using config 'num_retries_for_checking_llap_status' : {1}".format(total_timeout, num_retries))
+      Logger.debug("Calculated 'total_timeout' : {0} using config 'num_retries_for_checking_llap_status' : {1}".format(total_timeout, num_retries))
       refresh_rate = 2 # Frequency of checking the llapstatus
       percent_desired_instances_to_be_up = 80 # Out of 100.
       llap_app_info = self._get_llap_app_status_info_in_llap_ga(percent_desired_instances_to_be_up/100.0, total_timeout, refresh_rate)

http://git-wip-us.apache.org/repos/asf/ambari/blob/5b53d848/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/llap-cli-log4j2.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/llap-cli-log4j2.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/llap-cli-log4j2.xml
index a40c200..0b3eddb 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/llap-cli-log4j2.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/llap-cli-log4j2.xml
@@ -67,13 +67,14 @@ name = LlapCliLog4j2
 packages = org.apache.hadoop.hive.ql.log
 
 # list of properties
-property.hive.log.level = {{hive_log_level}}
+property.hive.log.level = WARN
 property.hive.root.logger = console
 property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
 property.hive.log.file = llap-cli.log
+property.hive.llapstatus.consolelogger.level = INFO
 
 # list of all appenders
-appenders = console, DRFA
+appenders = console, DRFA, llapstatusconsole
 
 # console appender
 appender.console.type = Console
@@ -82,6 +83,13 @@ appender.console.target = SYSTEM_ERR
 appender.console.layout.type = PatternLayout
 appender.console.layout.pattern = %p %c{2}: %m%n
 
+# llapstatusconsole appender
+appender.llapstatusconsole.type = Console
+appender.llapstatusconsole.name = llapstatusconsole
+appender.llapstatusconsole.target = SYSTEM_OUT
+appender.llapstatusconsole.layout.type = PatternLayout
+appender.llapstatusconsole.layout.pattern = %m%n
+
 # daily rolling file appender
 appender.DRFA.type = RollingRandomAccessFile
 appender.DRFA.name = DRFA
@@ -100,7 +108,7 @@ appender.DRFA.policies.fsize.type = SizeBasedTriggeringPolicy
 appender.DRFA.policies.fsize.size = {{llap_cli_log_maxfilesize}}MB
 
 # list of all loggers
-loggers = ZooKeeper, DataNucleus, Datastore, JPOX, HadoopConf
+loggers = ZooKeeper, DataNucleus, Datastore, JPOX, HadoopConf, LlapStatusServiceDriverConsole
 
 logger.ZooKeeper.name = org.apache.zookeeper
 logger.ZooKeeper.level = WARN
@@ -117,11 +125,19 @@ logger.JPOX.level = ERROR
 logger.HadoopConf.name = org.apache.hadoop.conf.Configuration
 logger.HadoopConf.level = ERROR
 
+logger.LlapStatusServiceDriverConsole.name = LlapStatusServiceDriverConsole
+logger.LlapStatusServiceDriverConsole.additivity = false
+logger.LlapStatusServiceDriverConsole.level = ${sys:hive.llapstatus.consolelogger.level}
+
+
 # root logger
 rootLogger.level = ${sys:hive.log.level}
 rootLogger.appenderRefs = root, DRFA
 rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
 rootLogger.appenderRef.DRFA.ref = DRFA
+logger.LlapStatusServiceDriverConsole.appenderRefs = llapstatusconsole, DRFA
+logger.LlapStatusServiceDriverConsole.appenderRef.llapstatusconsole.ref = llapstatusconsole
+logger.LlapStatusServiceDriverConsole.appenderRef.DRFA.ref = DRFA
   </value>
     <value-attributes>
       <type>content</type>