You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ao...@apache.org on 2016/10/26 17:09:06 UTC

[2/2] ambari git commit: AMBARI-18704. Add code to improve debugging of ambari-agent related problems. (aonishuk)

AMBARI-18704. Add code to improve debugging of ambari-agent related problems. (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4b3d2848
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4b3d2848
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4b3d2848

Branch: refs/heads/branch-2.5
Commit: 4b3d2848d5c589e7054c19d57c2d07057a9682af
Parents: 1e72a09
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Wed Oct 26 20:08:49 2016 +0300
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Wed Oct 26 20:08:49 2016 +0300

----------------------------------------------------------------------
 .../python/ambari_agent/HeartbeatHandlers.py    | 13 ++++--------
 .../python/ambari_agent/RemoteDebugUtils.py     | 21 +++++++++++++++++++-
 .../ambari_agent/StatusCommandsExecutor.py      | 12 ++++++++---
 3 files changed, 33 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/4b3d2848/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
index 4a3d372..836ab07 100644
--- a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
+++ b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
@@ -26,9 +26,10 @@ import signal
 import threading
 import traceback
 from ambari_commons.os_family_impl import OsFamilyImpl
-from RemoteDebugUtils import remote_debug
 import sys
 
+from ambari_agent.RemoteDebugUtils import bind_debug_signal_handlers
+
 logger = logging.getLogger()
 
 _handler = None
@@ -128,14 +129,8 @@ def bind_signal_handlers(agentPid):
     if os.getpid() == agentPid:
       signal.signal(signal.SIGINT, signal_handler)
       signal.signal(signal.SIGTERM, signal_handler)
-      signal.signal(signal.SIGUSR2, remote_debug) # Interrupt running process, and provide a python prompt for it
-      try:
-        import faulthandler  # This is not default module, has to be installed separately
-        faulthandler.enable(file=sys.stderr, all_threads=True)
-        faulthandler.register(signal.SIGUSR1, file=sys.stderr, all_threads=True, chain=False)
-        sys.stderr.write("Registered faulthandler\n")
-      except ImportError:
-        pass  # Module is not included into python distribution
+
+      bind_debug_signal_handlers()
 
     _handler = HeartbeatStopHandlersLinux()
   else:

http://git-wip-us.apache.org/repos/asf/ambari/blob/4b3d2848/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
index f2a462b..ae997ac 100644
--- a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
+++ b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
@@ -21,7 +21,26 @@ limitations under the License.
 try: import readline  # For readline input support
 except: pass
 
-import sys, os, traceback, codeop, cStringIO, cPickle, tempfile
+import sys, signal, os, traceback, codeop, cStringIO, cPickle, tempfile
+
+def bind_debug_signal_handlers():
+  signal.signal(signal.SIGUSR1, print_threads_stack_traces) # prints process threads current stack trace to the err stream. (can be found in ambari-agent.out)
+  signal.signal(signal.SIGUSR2, remote_debug) # provide a read-only python shell, which represent the process state at time of signal arrival.
+
+def print_threads_stack_traces(sig, frame):
+  print >> sys.stderr, "\n*** STACKTRACE - START ***\n"
+  code = []
+  for threadId, stack in sys._current_frames().items():
+    code.append("\n# ThreadID: %s" % threadId)
+    for filename, lineno, name, line in traceback.extract_stack(stack):
+      code.append('File: "%s", line %d, in %s' % (filename,
+                                                  lineno, name))
+      if line:
+        code.append("  %s" % (line.strip()))
+
+  for line in code:
+    print >> sys.stderr, line
+  print >> sys.stderr, "\n*** STACKTRACE - END ***\n"
 
 def pipename(pid):
   """Return name of pipe to use"""

http://git-wip-us.apache.org/repos/asf/ambari/blob/4b3d2848/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
index 8959640..20acee4 100644
--- a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
@@ -22,7 +22,8 @@ import signal
 import threading
 import logging
 import multiprocessing
-from PythonReflectiveExecutor import PythonReflectiveExecutor
+from ambari_agent.PythonReflectiveExecutor import PythonReflectiveExecutor
+from ambari_agent.RemoteDebugUtils import bind_debug_signal_handlers
 
 logger = logging.getLogger(__name__)
 
@@ -43,8 +44,10 @@ class StatusCommandsExecutor(multiprocessing.Process):
 
   def run(self):
     try:
+      bind_debug_signal_handlers()
       while True:
         command = self.actionQueue.statusCommandQueue.get(True) # blocks until status status command appears
+        logger.info("Running status command for {0}".format(command['componentName'])) # TODO: change to logger.debug once fixed
         
         timeout_timer = threading.Timer( self.status_command_timeout, self.respawn, [command])
         timeout_timer.start()
@@ -52,6 +55,7 @@ class StatusCommandsExecutor(multiprocessing.Process):
         self.process_status_command(command)
 
         timeout_timer.cancel()
+        logger.info("Completed status command for {0}".format(command['componentName']))  # TODO: change to logger.debug once fixed
     except:
       logger.exception("StatusCommandsExecutor process failed with exception:")
       raise
@@ -67,8 +71,10 @@ class StatusCommandsExecutor(multiprocessing.Process):
 
   def respawn(self, command):
     try:
-      # Force context to reset to normal. By context we mean sys.path, imports, etc. They are set by specific status command, and are not relevant to ambari-agent.
-      PythonReflectiveExecutor.last_context.revert()
+      if hasattr(PythonReflectiveExecutor, "last_context"):
+        # Force context to reset to normal. By context we mean sys.path, imports, etc. They are set by specific status command, and are not relevant to ambari-agent.
+        PythonReflectiveExecutor.last_context.revert()
+
       logger.warn("Command {0} for {1} is running for more than {2} seconds. Terminating it due to timeout.".format(command['commandType'], command['componentName'], self.status_command_timeout))
 
       self.hasTimeoutedEvent.set()