You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by av...@apache.org on 2018/07/09 20:42:29 UTC
[ambari] branch trunk updated: AMBARI-24270. Agent Status Command Are Randomly Failing With Empty stderr (aonishuk) (#1727)

This is an automated email from the ASF dual-hosted git repository.

avijayan pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/trunk by this push:
     new eca3633  AMBARI-24270. Agent Status Command Are Randomly Failing With Empty stderr (aonishuk) (#1727)
eca3633 is described below

commit eca3633d0ef1d7a1f464d14d3729a3ae11abe420
Author: aonishuk <ao...@hortonworks.com>
AuthorDate: Mon Jul 9 23:42:26 2018 +0300

    AMBARI-24270. Agent Status Command Are Randomly Failing With Empty stderr (aonishuk) (#1727)
---
 .../main/python/ambari_agent/ComponentStatusExecutor.py    |  2 +-
 .../main/python/ambari_agent/CustomServiceOrchestrator.py  | 14 ++++++++++++++
 .../src/main/python/ambari_agent/PythonExecutor.py         | 11 +++++++++--
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py b/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
index 5d20495..65af9b0 100644
--- a/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
@@ -136,7 +136,7 @@ class ComponentStatusExecutor(threading.Thread):
     if status == LiveStatus.DEAD_STATUS:
       stderr = component_status_result['stderr']
       if not "ComponentIsNotRunning" in stderr and not "ClientComponentHasNoStatus" in stderr:
-        logger.info("Status command for {0} failed:\n{1}".format(component_name, stderr))
+        logger.info("Status command for {0} failed ({1}) :\n{2}".format(component_name, component_status_result, stderr))
 
     result = {
       'serviceName': service_name,
diff --git a/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py b/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
index 51a0d59..bcb3e9b 100644
--- a/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
+++ b/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
@@ -326,6 +326,7 @@ class CustomServiceOrchestrator():
 
     try:
       command = self.generate_command(command_header)
+      logger.info("Generated command")
       script_type = command['commandParams']['script_type']
       script = command['commandParams']['script']
       timeout = int(command['commandParams']['command_timeout'])
@@ -339,6 +340,8 @@ class CustomServiceOrchestrator():
         command_name = command['roleCommand']
       else:
         task_id = 'status'
+        
+      logger.info("[{0}]".format(task_id))
 
       if forced_command_name is not None:  # If not supplied as an argument
         command_name = forced_command_name
@@ -422,11 +425,18 @@ class CustomServiceOrchestrator():
         if log_out_files:
           script_params.append("-o")
 
+        logger.info("[{0}] python_executor.run_file({1})".format(task_id, (py_file, script_params,
+                               tmpoutfile, tmperrfile, timeout,
+                               tmpstrucoutfile, self.map_task_to_process,
+                               task_id, override_output_files, backup_log_files,
+                               handle, log_info_on_failure)))
+                               
         ret = python_executor.run_file(py_file, script_params,
                                tmpoutfile, tmperrfile, timeout,
                                tmpstrucoutfile, self.map_task_to_process,
                                task_id, override_output_files, backup_log_files = backup_log_files,
                                handle = handle, log_info_on_failure=log_info_on_failure)
+        logger.info("[{0}] python_executor.run_file() returned {1}", task_id, ret)
         # Next run_file() invocations should always append to current output
         override_output_files = False
         if ret['exitcode'] != 0:
@@ -437,6 +447,7 @@ class CustomServiceOrchestrator():
 
       # if canceled and not background command
       if handle is None:
+        logger.info("[{0}] canceled", task_id)
         cancel_reason = self.command_canceled_reason(task_id)
         if cancel_reason is not None:
           ret['stdout'] += cancel_reason
@@ -512,6 +523,7 @@ class CustomServiceOrchestrator():
      Exit code 0 means that component is running and any other exit code means that
      component is not running
     """
+    logger.info("Requesting component status {0}".format(command_header))
     override_output_files=True # by default, we override status command output
     if logger.level == logging.DEBUG:
       override_output_files = False
@@ -519,6 +531,8 @@ class CustomServiceOrchestrator():
     res = self.runCommand(command_header, self.status_commands_stdout,
                           self.status_commands_stderr, self.COMMAND_NAME_STATUS,
                           override_output_files=override_output_files, is_status_command=True)
+                          
+    logger.info("requestComponentStatus return={0}".format(res))
     return res
 
   def resolve_script_path(self, base_dir, script):
diff --git a/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py b/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
index 8b9e9ca..bfbbc17 100644
--- a/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
@@ -44,6 +44,7 @@ class PythonExecutor(object):
   used as a singleton for a concurrent execution of python scripts
   """
   NO_ERROR = "none"
+  i = 0
 
   def __init__(self, tmpDir, config):
     self.grep = Grep()
@@ -93,14 +94,16 @@ class PythonExecutor(object):
     recreated or appended.
     The structured out file, however, is preserved during multiple invocations that use the same file.
     """
+    PythonExecutor.i += 1
+    
     pythonCommand = self.python_command(script, script_params)
-    if logger.isEnabledFor(logging.DEBUG):
-      logger.debug("Running command %s", pprint.pformat(pythonCommand))
+    logger.info("[%d] Running command %s", PythonExecutor.i, pprint.pformat(pythonCommand))
 
     if handle is None:
       tmpout, tmperr = self.open_subprocess32_files(tmpoutfile, tmperrfile, override_output_files, backup_log_files)
 
       process = self.launch_python_subprocess32(pythonCommand, tmpout, tmperr)
+      logger.info("[%d] Started process with pid = %s", PythonExecutor.i, process.pid)
       # map task_id to pid
       callback(task_id, process.pid)
       logger.debug("Launching watchdog thread")
@@ -110,6 +113,7 @@ class PythonExecutor(object):
       thread.start()
       # Waiting for the process to be either finished or killed
       process.communicate()
+      logger.info("[{0}] Process communicated {1}", PythonExecutor.i, (process.returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout))
       self.event.set()
       thread.join()
       result = self.prepare_process_result(process.returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout=timeout)
@@ -117,8 +121,10 @@ class PythonExecutor(object):
       if log_info_on_failure and result['exitcode']:
         self.on_failure(pythonCommand, result)
 
+      logger.info("[{0}] PE.run_file() returned {1}", PythonExecutor.i, result)
       return result
     else:
+      logger.info("[{0}] has handle", PythonExecutor.i)
       holder = Holder(pythonCommand, tmpoutfile, tmperrfile, tmpstructedoutfile, handle)
 
       background = BackgroundThread(holder, self)
@@ -176,6 +182,7 @@ class PythonExecutor(object):
       for k, v in command_env.iteritems():
         command_env[k] = str(v)
 
+    logger.info("[{0}] launch_python_subprocess32 {1}".format(PythonExecutor.i, (command, tmpout, tmperr, close_fds, self.preexec_fn)))
     return subprocess32.Popen(command,
       stdout=tmpout,
       stderr=tmperr, close_fds=close_fds, env=command_env, preexec_fn=self.preexec_fn)