You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/08 19:06:37 UTC

(impala) 02/04: IMPALA-12643 (part 2): Fallback to safe libraries on error in resolve_minidumps.py

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dac7f409ba0619180835ea908349ac5108a58c3a
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Dec 17 13:59:35 2023 -0800

    IMPALA-12643 (part 2): Fallback to safe libraries on error in resolve_minidumps.py
    
    Since resolve_minidumps.py's call to minidump_stackwalk can go haywire
    due to bad symbols in shared libraries, this adds a fallback mechanism
    where it tries again with a "safe" list of shared libraries. These are
    limited to the ones that make the most difference in resolving minidumps
    (libc, libstdc++, and libjvm). The list of safe libraries can be
    customized via the --safe_library_list.
    
    Testing:
     - Verified that this uses the fallback on Centos 7 and resolves
       the minidumps successfully.
    
    Change-Id: I6bb4c9f65f9c27bb3b86c7ff2f3a6a48e258ef01
    Reviewed-on: http://gerrit.cloudera.org:8080/20863
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Joe McDonnell <jo...@cloudera.com>
---
 bin/resolve_minidumps.py | 96 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 22 deletions(-)

diff --git a/bin/resolve_minidumps.py b/bin/resolve_minidumps.py
index f410fabcf..3c64d9d55 100755
--- a/bin/resolve_minidumps.py
+++ b/bin/resolve_minidumps.py
@@ -43,6 +43,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
+import traceback
 
 from argparse import ArgumentParser
 
@@ -132,6 +133,26 @@ def read_module_info(minidump_dump_contents):
   return modules
 
 
+def filter_shared_library_modules(module_list, lib_allow_list):
+  """Filter the list of modules by eliminating any shared libaries that do not match
+  one of the prefixes in the allow list. This keeps all non-shared libaries
+  (such as the main binary).
+  """
+  filtered_module_list = []
+  for module in module_list:
+    code_file_basename = os.path.basename(module.code_file)
+    # Keep anything that is not a shared library (e.g. the main binary)
+    if ".so" not in code_file_basename:
+      filtered_module_list.append(module)
+      continue
+    # Only keep shared libraries that match an entry on the allow list.
+    for allow_lib in lib_allow_list:
+      if code_file_basename.startswith(allow_lib):
+        filtered_module_list.append(module)
+        break
+  return filtered_module_list
+
+
 def find_breakpad_home():
   """Locate the Breakpad home directory.
 
@@ -331,10 +352,39 @@ def parse_args():
   parser.add_argument('--minidump_file', required=True)
   parser.add_argument('--output_file', required=True)
   parser.add_argument('-v', '--verbose', action='store_true')
+  parser.add_argument('--safe_library_list',
+      default="libstdc++.so,libc.so,libjvm.so",
+      help="Comma-separate list of prefixes for allowed system libraries")
   args = parser.parse_args()
   return args
 
 
+def dump_syms_and_resolve_stack(modules, minidump_file, output_file, verbose):
+  """Dump the symbols for the listed modules and use them to resolve the minidump."""
+  # Create a temporary directory to store the symbols
+  # This automatically gets cleaned up
+  with tempfile.TemporaryDirectory() as tmp_dir:
+    # Dump symbols for all the modules into this temporary directory.
+    # Need both dump_syms and objcopy
+    dump_syms_bin = find_breakpad_binary("dump_syms")
+    if not dump_syms_bin:
+      logging.error("Could not find Breakpad dump_syms binary")
+      sys.exit(1)
+    objcopy_bin = find_objcopy_binary()
+    if not objcopy_bin:
+      logging.error("Could not find Binutils objcopy binary")
+      sys.exit(1)
+    dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir)
+
+    # Resolve the minidump with the temporary symbol directory
+    minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk")
+    if not minidump_stackwalk_bin:
+      logging.error("Could not find Breakpad minidump_stackwalk binary")
+      sys.exit(1)
+    resolve_minidump(find_breakpad_binary("minidump_stackwalk"), minidump_file,
+                     tmp_dir, verbose, output_file)
+
+
 def main():
   args = parse_args()
 
@@ -361,28 +411,30 @@ def main():
     logging.error("Failed to read modules for {0}".format(args.minidump_file))
     sys.exit(1)
 
-  # Create a temporary directory to store the symbols.
-  # This automatically gets cleaned up.
-  with tempfile.TemporaryDirectory() as tmp_dir:
-    # Step 3: Dump symbols for all the modules into this temporary directory.
-    # Need both dump_syms and objcopy
-    dump_syms_bin = find_breakpad_binary("dump_syms")
-    if not dump_syms_bin:
-      logging.error("Could not find Breakpad dump_syms binary")
-      sys.exit(1)
-    objcopy_bin = find_objcopy_binary()
-    if not objcopy_bin:
-      logging.error("Could not find Binutils objcopy binary")
-      sys.exit(1)
-    dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir)
-
-    # Step 4: Resolve the minidump with the temporary symbol directory
-    minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk")
-    if not minidump_stackwalk_bin:
-      logging.error("Could not find Breakpad minidump_stackwalk binary")
-      sys.exit(1)
-    resolve_minidump(find_breakpad_binary("minidump_stackwalk"), args.minidump_file,
-                     tmp_dir, args.verbose, args.output_file)
+  # Step 3: Dump the symbols and use them to resolve the minidump
+  # Sometimes there are libraries with corrupt/problematic symbols
+  # that can cause minidump_stackwalk to go haywire and use excessive
+  # memory. First, we try using symbols from all of the shared libraries.
+  # If that fails, we fallback to using a "safe" list of shared libraries.
+  try:
+    # Dump the symbols and use them to resolve the minidump
+    dump_syms_and_resolve_stack(modules, args.minidump_file, args.output_file,
+                                args.verbose)
+    return
+  except Exception:
+    logging.warning("Encountered error: {0}".format(traceback.format_exc()))
+    logging.warning("Falling back to resolution using the safe library list")
+    logging.warning("Safe library list: {0}".format(args.safe_library_list))
+
+  # Limit the shared libraries to the "safe" list of shared libraries and
+  # try again.
+  if len(args.safe_library_list) == 0:
+    safe_library_list = []
+  else:
+    safe_library_list = args.safe_library_list.split(",")
+  safe_modules = filter_shared_library_modules(modules, safe_library_list)
+  dump_syms_and_resolve_stack(safe_modules, args.minidump_file, args.output_file,
+                              args.verbose)
 
 
 if __name__ == "__main__":