You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2016/10/27 15:20:31 UTC
[03/16] ambari git commit: AMBARI-18704. Add code to improve
debugging of ambari-agent related problems. (aonishuk)
AMBARI-18704. Add code to improve debugging of ambari-agent related problems. (aonishuk)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/aa588ca8
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/aa588ca8
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/aa588ca8
Branch: refs/heads/branch-feature-AMBARI-18634
Commit: aa588ca8667f5f67b6be2251a6dad37230172fb4
Parents: 7b30be6
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Wed Oct 26 20:08:46 2016 +0300
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Wed Oct 26 20:08:46 2016 +0300
----------------------------------------------------------------------
.../python/ambari_agent/HeartbeatHandlers.py | 13 ++++--------
.../python/ambari_agent/RemoteDebugUtils.py | 21 +++++++++++++++++++-
.../ambari_agent/StatusCommandsExecutor.py | 12 ++++++++---
3 files changed, 33 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/aa588ca8/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
index 4a3d372..836ab07 100644
--- a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
+++ b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
@@ -26,9 +26,10 @@ import signal
import threading
import traceback
from ambari_commons.os_family_impl import OsFamilyImpl
-from RemoteDebugUtils import remote_debug
import sys
+from ambari_agent.RemoteDebugUtils import bind_debug_signal_handlers
+
logger = logging.getLogger()
_handler = None
@@ -128,14 +129,8 @@ def bind_signal_handlers(agentPid):
if os.getpid() == agentPid:
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
- signal.signal(signal.SIGUSR2, remote_debug) # Interrupt running process, and provide a python prompt for it
- try:
- import faulthandler # This is not default module, has to be installed separately
- faulthandler.enable(file=sys.stderr, all_threads=True)
- faulthandler.register(signal.SIGUSR1, file=sys.stderr, all_threads=True, chain=False)
- sys.stderr.write("Registered faulthandler\n")
- except ImportError:
- pass # Module is not included into python distribution
+
+ bind_debug_signal_handlers()
_handler = HeartbeatStopHandlersLinux()
else:
http://git-wip-us.apache.org/repos/asf/ambari/blob/aa588ca8/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
index f2a462b..ae997ac 100644
--- a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
+++ b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
@@ -21,7 +21,26 @@ limitations under the License.
try: import readline # For readline input support
except: pass
-import sys, os, traceback, codeop, cStringIO, cPickle, tempfile
+import sys, signal, os, traceback, codeop, cStringIO, cPickle, tempfile
+
+def bind_debug_signal_handlers():
+ signal.signal(signal.SIGUSR1, print_threads_stack_traces) # prints process threads current stack trace to the err stream. (can be found in ambari-agent.out)
+ signal.signal(signal.SIGUSR2, remote_debug) # provide a read-only python shell, which represent the process state at time of signal arrival.
+
+def print_threads_stack_traces(sig, frame):
+ print >> sys.stderr, "\n*** STACKTRACE - START ***\n"
+ code = []
+ for threadId, stack in sys._current_frames().items():
+ code.append("\n# ThreadID: %s" % threadId)
+ for filename, lineno, name, line in traceback.extract_stack(stack):
+ code.append('File: "%s", line %d, in %s' % (filename,
+ lineno, name))
+ if line:
+ code.append(" %s" % (line.strip()))
+
+ for line in code:
+ print >> sys.stderr, line
+ print >> sys.stderr, "\n*** STACKTRACE - END ***\n"
def pipename(pid):
"""Return name of pipe to use"""
http://git-wip-us.apache.org/repos/asf/ambari/blob/aa588ca8/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
index 8959640..20acee4 100644
--- a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
@@ -22,7 +22,8 @@ import signal
import threading
import logging
import multiprocessing
-from PythonReflectiveExecutor import PythonReflectiveExecutor
+from ambari_agent.PythonReflectiveExecutor import PythonReflectiveExecutor
+from ambari_agent.RemoteDebugUtils import bind_debug_signal_handlers
logger = logging.getLogger(__name__)
@@ -43,8 +44,10 @@ class StatusCommandsExecutor(multiprocessing.Process):
def run(self):
try:
+ bind_debug_signal_handlers()
while True:
command = self.actionQueue.statusCommandQueue.get(True) # blocks until status status command appears
+ logger.info("Running status command for {0}".format(command['componentName'])) # TODO: change to logger.debug once fixed
timeout_timer = threading.Timer( self.status_command_timeout, self.respawn, [command])
timeout_timer.start()
@@ -52,6 +55,7 @@ class StatusCommandsExecutor(multiprocessing.Process):
self.process_status_command(command)
timeout_timer.cancel()
+ logger.info("Completed status command for {0}".format(command['componentName'])) # TODO: change to logger.debug once fixed
except:
logger.exception("StatusCommandsExecutor process failed with exception:")
raise
@@ -67,8 +71,10 @@ class StatusCommandsExecutor(multiprocessing.Process):
def respawn(self, command):
try:
- # Force context to reset to normal. By context we mean sys.path, imports, etc. They are set by specific status command, and are not relevant to ambari-agent.
- PythonReflectiveExecutor.last_context.revert()
+ if hasattr(PythonReflectiveExecutor, "last_context"):
+ # Force context to reset to normal. By context we mean sys.path, imports, etc. They are set by specific status command, and are not relevant to ambari-agent.
+ PythonReflectiveExecutor.last_context.revert()
+
logger.warn("Command {0} for {1} is running for more than {2} seconds. Terminating it due to timeout.".format(command['commandType'], command['componentName'], self.status_command_timeout))
self.hasTimeoutedEvent.set()