You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/08 19:06:37 UTC
(impala) 02/04: IMPALA-12643 (part 2): Fallback to safe libraries on error in resolve_minidumps.py
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit dac7f409ba0619180835ea908349ac5108a58c3a
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Dec 17 13:59:35 2023 -0800
IMPALA-12643 (part 2): Fallback to safe libraries on error in resolve_minidumps.py
Since resolve_minidumps.py's call to minidump_stackwalk can go haywire
due to bad symbols in shared libraries, this adds a fallback mechanism
where it tries again with a "safe" list of shared libraries. These are
limited to the ones that make the most difference in resolving minidumps
(libc, libstdc++, and libjvm). The list of safe libraries can be
customized via the --safe_library_list.
Testing:
- Verified that this uses the fallback on Centos 7 and resolves
the minidumps successfully.
Change-Id: I6bb4c9f65f9c27bb3b86c7ff2f3a6a48e258ef01
Reviewed-on: http://gerrit.cloudera.org:8080/20863
Reviewed-by: Michael Smith <mi...@cloudera.com>
Tested-by: Joe McDonnell <jo...@cloudera.com>
---
bin/resolve_minidumps.py | 96 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 74 insertions(+), 22 deletions(-)
diff --git a/bin/resolve_minidumps.py b/bin/resolve_minidumps.py
index f410fabcf..3c64d9d55 100755
--- a/bin/resolve_minidumps.py
+++ b/bin/resolve_minidumps.py
@@ -43,6 +43,7 @@ import shutil
import subprocess
import sys
import tempfile
+import traceback
from argparse import ArgumentParser
@@ -132,6 +133,26 @@ def read_module_info(minidump_dump_contents):
return modules
+def filter_shared_library_modules(module_list, lib_allow_list):
+ """Filter the list of modules by eliminating any shared libaries that do not match
+ one of the prefixes in the allow list. This keeps all non-shared libaries
+ (such as the main binary).
+ """
+ filtered_module_list = []
+ for module in module_list:
+ code_file_basename = os.path.basename(module.code_file)
+ # Keep anything that is not a shared library (e.g. the main binary)
+ if ".so" not in code_file_basename:
+ filtered_module_list.append(module)
+ continue
+ # Only keep shared libraries that match an entry on the allow list.
+ for allow_lib in lib_allow_list:
+ if code_file_basename.startswith(allow_lib):
+ filtered_module_list.append(module)
+ break
+ return filtered_module_list
+
+
def find_breakpad_home():
"""Locate the Breakpad home directory.
@@ -331,10 +352,39 @@ def parse_args():
parser.add_argument('--minidump_file', required=True)
parser.add_argument('--output_file', required=True)
parser.add_argument('-v', '--verbose', action='store_true')
+ parser.add_argument('--safe_library_list',
+ default="libstdc++.so,libc.so,libjvm.so",
+ help="Comma-separate list of prefixes for allowed system libraries")
args = parser.parse_args()
return args
+def dump_syms_and_resolve_stack(modules, minidump_file, output_file, verbose):
+ """Dump the symbols for the listed modules and use them to resolve the minidump."""
+ # Create a temporary directory to store the symbols
+ # This automatically gets cleaned up
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ # Dump symbols for all the modules into this temporary directory.
+ # Need both dump_syms and objcopy
+ dump_syms_bin = find_breakpad_binary("dump_syms")
+ if not dump_syms_bin:
+ logging.error("Could not find Breakpad dump_syms binary")
+ sys.exit(1)
+ objcopy_bin = find_objcopy_binary()
+ if not objcopy_bin:
+ logging.error("Could not find Binutils objcopy binary")
+ sys.exit(1)
+ dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir)
+
+ # Resolve the minidump with the temporary symbol directory
+ minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk")
+ if not minidump_stackwalk_bin:
+ logging.error("Could not find Breakpad minidump_stackwalk binary")
+ sys.exit(1)
+ resolve_minidump(find_breakpad_binary("minidump_stackwalk"), minidump_file,
+ tmp_dir, verbose, output_file)
+
+
def main():
args = parse_args()
@@ -361,28 +411,30 @@ def main():
logging.error("Failed to read modules for {0}".format(args.minidump_file))
sys.exit(1)
- # Create a temporary directory to store the symbols.
- # This automatically gets cleaned up.
- with tempfile.TemporaryDirectory() as tmp_dir:
- # Step 3: Dump symbols for all the modules into this temporary directory.
- # Need both dump_syms and objcopy
- dump_syms_bin = find_breakpad_binary("dump_syms")
- if not dump_syms_bin:
- logging.error("Could not find Breakpad dump_syms binary")
- sys.exit(1)
- objcopy_bin = find_objcopy_binary()
- if not objcopy_bin:
- logging.error("Could not find Binutils objcopy binary")
- sys.exit(1)
- dump_symbols_for_all_modules(dump_syms_bin, objcopy_bin, modules, tmp_dir)
-
- # Step 4: Resolve the minidump with the temporary symbol directory
- minidump_stackwalk_bin = find_breakpad_binary("minidump_stackwalk")
- if not minidump_stackwalk_bin:
- logging.error("Could not find Breakpad minidump_stackwalk binary")
- sys.exit(1)
- resolve_minidump(find_breakpad_binary("minidump_stackwalk"), args.minidump_file,
- tmp_dir, args.verbose, args.output_file)
+ # Step 3: Dump the symbols and use them to resolve the minidump
+ # Sometimes there are libraries with corrupt/problematic symbols
+ # that can cause minidump_stackwalk to go haywire and use excessive
+ # memory. First, we try using symbols from all of the shared libraries.
+ # If that fails, we fallback to using a "safe" list of shared libraries.
+ try:
+ # Dump the symbols and use them to resolve the minidump
+ dump_syms_and_resolve_stack(modules, args.minidump_file, args.output_file,
+ args.verbose)
+ return
+ except Exception:
+ logging.warning("Encountered error: {0}".format(traceback.format_exc()))
+ logging.warning("Falling back to resolution using the safe library list")
+ logging.warning("Safe library list: {0}".format(args.safe_library_list))
+
+ # Limit the shared libraries to the "safe" list of shared libraries and
+ # try again.
+ if len(args.safe_library_list) == 0:
+ safe_library_list = []
+ else:
+ safe_library_list = args.safe_library_list.split(",")
+ safe_modules = filter_shared_library_modules(modules, safe_library_list)
+ dump_syms_and_resolve_stack(safe_modules, args.minidump_file, args.output_file,
+ args.verbose)
if __name__ == "__main__":