You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2024/01/29 18:14:34 UTC

(impala) branch master updated: IMPALA-12125: Support for dumping symbols from RPMs without separate symbols

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 3285cfd69 IMPALA-12125: Support for dumping symbols from RPMs without separate symbols
3285cfd69 is described below

commit 3285cfd69055f485e9a835965d3e4d3695a61b6c
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Mon Jan 22 23:21:40 2024 -0800

    IMPALA-12125: Support for dumping symbols from RPMs without separate symbols
    
    Some RPMs contain binaries with debug symbols with no separate
    debuginfo package needed. bin/dump_breakpad_symbols.py does not
    allow this combination, as it expects a corresponding symbol
    package. This adds a --no_symbol_pkg option to dump_breakpad_symbols.py
    to turn off the requirement that --pkg be combined with --symbol_pkg.
    
    Testing:
     - Tested with an RPM package with an unstripped impalad binary
     - Tested with the usual RPM + debuginfo RPM combination
    
    Change-Id: I9589b0ed7855fe49c6989ec3dcc51a9e9c4f476b
    Reviewed-on: http://gerrit.cloudera.org:8080/20944
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Yida Wu <wy...@gmail.com>
---
 bin/dump_breakpad_symbols.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py
index da8485bd4..e28422e2e 100755
--- a/bin/dump_breakpad_symbols.py
+++ b/bin/dump_breakpad_symbols.py
@@ -137,6 +137,8 @@ def parse_args():
       to process, use with -s""")
   parser.add_argument('-s', '--symbol_pkg', '--debuginfo_rpm', help="""RPM/DEB file
       containing the debug symbols matching the binaries in -r""")
+  parser.add_argument('--no_symbol_pkg', '--no_debuginfo_rpm', action='store_true',
+      help="""Don't require a symbol pkg when processing a RPM/DEB package with -r""")
   parser.add_argument('--objcopy', help='Path to the objcopy binary from Binutils')
   parser.add_argument('--num_processes', type=int, default=multiprocessing.cpu_count(),
       help="Number of parallel processes to use.")
@@ -144,9 +146,9 @@ def parse_args():
 
   # Post processing checks
   # Check that either both pkg and debuginfo_rpm/deb are specified, or none.
-  if bool(args.pkg) != bool(args.symbol_pkg):
+  if not args.no_symbol_pkg and bool(args.pkg) != bool(args.symbol_pkg):
     parser.print_usage()
-    die('Either both -r and -s have to be specified, or none')
+    die("The -r option requires a corresponding -s unless --no_symbol_pkg is specified")
   input_flags = [args.build_dir, args.binary_files, args.stdin_files, args.pkg]
   if sum(1 for flag in input_flags if flag) != 1:
     die('You need to specify exactly one way to locate input files (-b/-f/-i/-r,-s)')
@@ -218,30 +220,37 @@ def enumerate_pkg_files(pkg, symbol_pkg):
   """Return a generator over BinarySymbolInfo tuples for all ELF files in 'pkg'.
 
   This function extracts both RPM/DEB files, then walks the binary pkg directory to
-  enumerate all ELF files, matches them to the location of their respective .debug files
-  and yields all tuples thereof. We use a generator here to keep the temporary directory
-  and its contents around until the consumer of the generator has finished its processing.
+  enumerate all ELF files. If there is no separate symbol pkg, it simply yields
+  all ELF files. If there is a separate symbol pkg, it matches the binaries
+  to the location of their respective .debug files and yields the matching tuples.
+  We use a generator here to keep the temporary directory and its contents around
+  until the consumer of the generator has finished its processing.
   """
   IMPALA_BINARY_BASE = os.path.join('usr', 'lib', 'impala')
   IMPALA_SYMBOL_BASE = os.path.join('usr', 'lib', 'debug', IMPALA_BINARY_BASE)
   assert_file_exists(pkg)
-  assert_file_exists(symbol_pkg)
+  if symbol_pkg:
+    assert_file_exists(symbol_pkg)
   tmp_dir = tempfile.mkdtemp()
   try:
     # Extract pkg
     logging.info('Extracting to %s: %s' % (tmp_dir, pkg))
     extract_pkg(os.path.abspath(pkg), tmp_dir)
-    # Extract symbol_pkg
-    logging.info('Extracting to %s: %s' % (tmp_dir, symbol_pkg))
-    extract_pkg(os.path.abspath(symbol_pkg), tmp_dir)
-    # Walk pkg path and find elf files
     binary_base = os.path.join(tmp_dir, IMPALA_BINARY_BASE)
-    symbol_base = os.path.join(tmp_dir, IMPALA_SYMBOL_BASE)
+    if symbol_pkg:
+      # Extract symbol_pkg
+      logging.info('Extracting to %s: %s' % (tmp_dir, symbol_pkg))
+      extract_pkg(os.path.abspath(symbol_pkg), tmp_dir)
+      symbol_base = os.path.join(tmp_dir, IMPALA_SYMBOL_BASE)
+    # Walk pkg path and find elf files
     # Find folder with .debug file in symbol_pkg path
     for binary_path in find_elf_files(binary_base):
       # Add tuple to output
-      rel_dir = os.path.relpath(os.path.dirname(binary_path), binary_base)
-      debug_dir = os.path.join(symbol_base, rel_dir)
+      if symbol_pkg:
+        rel_dir = os.path.relpath(os.path.dirname(binary_path), binary_base)
+        debug_dir = os.path.join(symbol_base, rel_dir)
+      else:
+        debug_dir = None
       yield BinarySymbolInfo(binary_path, debug_dir)
   finally:
     shutil.rmtree(tmp_dir)