You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by av...@apache.org on 2018/07/09 20:42:29 UTC
[ambari] branch trunk updated: AMBARI-24270. Agent Status Command
Are Randomly Failing With Empty stderr (aonishuk) (#1727)
This is an automated email from the ASF dual-hosted git repository.
avijayan pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/trunk by this push:
new eca3633 AMBARI-24270. Agent Status Command Are Randomly Failing With Empty stderr (aonishuk) (#1727)
eca3633 is described below
commit eca3633d0ef1d7a1f464d14d3729a3ae11abe420
Author: aonishuk <ao...@hortonworks.com>
AuthorDate: Mon Jul 9 23:42:26 2018 +0300
AMBARI-24270. Agent Status Command Are Randomly Failing With Empty stderr (aonishuk) (#1727)
---
.../main/python/ambari_agent/ComponentStatusExecutor.py | 2 +-
.../main/python/ambari_agent/CustomServiceOrchestrator.py | 14 ++++++++++++++
.../src/main/python/ambari_agent/PythonExecutor.py | 11 +++++++++--
3 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py b/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
index 5d20495..65af9b0 100644
--- a/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/ComponentStatusExecutor.py
@@ -136,7 +136,7 @@ class ComponentStatusExecutor(threading.Thread):
if status == LiveStatus.DEAD_STATUS:
stderr = component_status_result['stderr']
if not "ComponentIsNotRunning" in stderr and not "ClientComponentHasNoStatus" in stderr:
- logger.info("Status command for {0} failed:\n{1}".format(component_name, stderr))
+ logger.info("Status command for {0} failed ({1}) :\n{2}".format(component_name, component_status_result, stderr))
result = {
'serviceName': service_name,
diff --git a/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py b/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
index 51a0d59..bcb3e9b 100644
--- a/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
+++ b/ambari-agent/src/main/python/ambari_agent/CustomServiceOrchestrator.py
@@ -326,6 +326,7 @@ class CustomServiceOrchestrator():
try:
command = self.generate_command(command_header)
+ logger.info("Generated command")
script_type = command['commandParams']['script_type']
script = command['commandParams']['script']
timeout = int(command['commandParams']['command_timeout'])
@@ -339,6 +340,8 @@ class CustomServiceOrchestrator():
command_name = command['roleCommand']
else:
task_id = 'status'
+
+ logger.info("[{0}]".format(task_id))
if forced_command_name is not None: # If not supplied as an argument
command_name = forced_command_name
@@ -422,11 +425,18 @@ class CustomServiceOrchestrator():
if log_out_files:
script_params.append("-o")
+ logger.info("[{0}] python_executor.run_file({1})".format(task_id, (py_file, script_params,
+ tmpoutfile, tmperrfile, timeout,
+ tmpstrucoutfile, self.map_task_to_process,
+ task_id, override_output_files, backup_log_files,
+ handle, log_info_on_failure)))
+
ret = python_executor.run_file(py_file, script_params,
tmpoutfile, tmperrfile, timeout,
tmpstrucoutfile, self.map_task_to_process,
task_id, override_output_files, backup_log_files = backup_log_files,
handle = handle, log_info_on_failure=log_info_on_failure)
+ logger.info("[{0}] python_executor.run_file() returned {1}", task_id, ret)
# Next run_file() invocations should always append to current output
override_output_files = False
if ret['exitcode'] != 0:
@@ -437,6 +447,7 @@ class CustomServiceOrchestrator():
# if canceled and not background command
if handle is None:
+ logger.info("[{0}] canceled", task_id)
cancel_reason = self.command_canceled_reason(task_id)
if cancel_reason is not None:
ret['stdout'] += cancel_reason
@@ -512,6 +523,7 @@ class CustomServiceOrchestrator():
Exit code 0 means that component is running and any other exit code means that
component is not running
"""
+ logger.info("Requesting component status {0}".format(command_header))
override_output_files=True # by default, we override status command output
if logger.level == logging.DEBUG:
override_output_files = False
@@ -519,6 +531,8 @@ class CustomServiceOrchestrator():
res = self.runCommand(command_header, self.status_commands_stdout,
self.status_commands_stderr, self.COMMAND_NAME_STATUS,
override_output_files=override_output_files, is_status_command=True)
+
+ logger.info("requestComponentStatus return={0}".format(res))
return res
def resolve_script_path(self, base_dir, script):
diff --git a/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py b/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
index 8b9e9ca..bfbbc17 100644
--- a/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/PythonExecutor.py
@@ -44,6 +44,7 @@ class PythonExecutor(object):
used as a singleton for a concurrent execution of python scripts
"""
NO_ERROR = "none"
+ i = 0
def __init__(self, tmpDir, config):
self.grep = Grep()
@@ -93,14 +94,16 @@ class PythonExecutor(object):
recreated or appended.
The structured out file, however, is preserved during multiple invocations that use the same file.
"""
+ PythonExecutor.i += 1
+
pythonCommand = self.python_command(script, script_params)
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug("Running command %s", pprint.pformat(pythonCommand))
+ logger.info("[%d] Running command %s", PythonExecutor.i, pprint.pformat(pythonCommand))
if handle is None:
tmpout, tmperr = self.open_subprocess32_files(tmpoutfile, tmperrfile, override_output_files, backup_log_files)
process = self.launch_python_subprocess32(pythonCommand, tmpout, tmperr)
+ logger.info("[%d] Started process with pid = %s", PythonExecutor.i, process.pid)
# map task_id to pid
callback(task_id, process.pid)
logger.debug("Launching watchdog thread")
@@ -110,6 +113,7 @@ class PythonExecutor(object):
thread.start()
# Waiting for the process to be either finished or killed
process.communicate()
+ logger.info("[{0}] Process communicated {1}", PythonExecutor.i, (process.returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout))
self.event.set()
thread.join()
result = self.prepare_process_result(process.returncode, tmpoutfile, tmperrfile, tmpstructedoutfile, timeout=timeout)
@@ -117,8 +121,10 @@ class PythonExecutor(object):
if log_info_on_failure and result['exitcode']:
self.on_failure(pythonCommand, result)
+ logger.info("[{0}] PE.run_file() returned {1}", PythonExecutor.i, result)
return result
else:
+ logger.info("[{0}] has handle", PythonExecutor.i)
holder = Holder(pythonCommand, tmpoutfile, tmperrfile, tmpstructedoutfile, handle)
background = BackgroundThread(holder, self)
@@ -176,6 +182,7 @@ class PythonExecutor(object):
for k, v in command_env.iteritems():
command_env[k] = str(v)
+ logger.info("[{0}] launch_python_subprocess32 {1}".format(PythonExecutor.i, (command, tmpout, tmperr, close_fds, self.preexec_fn)))
return subprocess32.Popen(command,
stdout=tmpout,
stderr=tmperr, close_fds=close_fds, env=command_env, preexec_fn=self.preexec_fn)