You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2024/01/29 23:44:13 UTC

(impala) 01/02: IMPALA-12745: Skip parallel symbol dumping with RPM/DEB packages

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 41a3f4d4ca43092d0ef48eeaa765626b720e986c
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Tue Jan 23 10:02:53 2024 -0800

    IMPALA-12745: Skip parallel symbol dumping with RPM/DEB packages
    
    When using bin/dump_breakpad_symbols.py to dump symbols for RPM/DEB
    packages, the script extracts the packages to a temporary directory
    and relies on keeping that directory around until the processing
    is finished. The parallel processing added in IMPALA-11511 breaks
    the logic that keeps the temporary directory around, so the script
    generates errors like:
    
    Found debugging info in /tmp/tmpqfZ9MZ/usr/lib/debug/usr/lib/impala/sbin-retail/impalad.debug
    Failed to open ELF file '/tmp/tmpqfZ9MZ/usr/lib/debug/usr/lib/impala/sbin-retail/impalad.debug': No such file or directory
    Failed to write symbol file.
    
    This turns off parallelism for bin/dump_breakpad_symbols.py when
    processing RPM/DEB packages (i.e. -r/--pkg). This also avoids using
    a ThreadPool when num_processes <= 1.
    
    Testing:
     - Hand tested with Redhat 7 RPMs
    
    Change-Id: If2885a9cfb36a4f616b539599e7f744bd23552c3
    Reviewed-on: http://gerrit.cloudera.org:8080/20943
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Joe McDonnell <jo...@cloudera.com>
---
 bin/dump_breakpad_symbols.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py
index e28422e2e..ce10daa91 100755
--- a/bin/dump_breakpad_symbols.py
+++ b/bin/dump_breakpad_symbols.py
@@ -353,20 +353,30 @@ def main():
   assert objcopy
   status = 0
   ensure_dir_exists(args.dest_dir)
-  # Use a thread pool to go parallel
-  thread_pool = ThreadPool(processes=args.num_processes)
-
-  def processing_fn(binary):
-    return process_binary(dump_syms, objcopy, binary, args.dest_dir)
-
-  for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)):
-    if not result:
-      thread_pool.terminate()
-      status = 1
-      break
-
-  thread_pool.close()
-  thread_pool.join()
+  # The logic for handling DEB/RPM packages does not currently work with
+  # parallelism, so disable parallelism if using the -r/--pkg option.
+  if args.num_processes > 1 and not bool(args.pkg):
+    # Use a thread pool to go parallel
+    thread_pool = ThreadPool(processes=args.num_processes)
+
+    def processing_fn(binary):
+      return process_binary(dump_syms, objcopy, binary, args.dest_dir)
+
+    for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)):
+      if not result:
+        thread_pool.terminate()
+        status = 1
+        break
+
+    thread_pool.close()
+    thread_pool.join()
+  else:
+    # For serial cases, simply avoid the ThreadPool altogether, as that makes it
+    # easy to reason about.
+    for binary in enumerate_binaries(args):
+      if not process_binary(dump_syms, objcopy, binary, args.dest_dir):
+        status = 1
+        break
   sys.exit(status)