You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/08 19:06:39 UTC

(impala) 04/04: IMPALA-10048: Go parallel for dump_breakpad_symbols.py

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3bcd770dfc51ed5148cf85a96f5644594f953319
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Jun 25 21:20:25 2023 -0700

    IMPALA-10048: Go parallel for dump_breakpad_symbols.py
    
    This modifies dump_breakpad_symbols.py to use a ThreadPool
    to go parallel when there are multiple binaries or
    libraries to process. This is common for Jenkins jobs that
    dump symbols for all backend tests. The different binaries
    write out to different directories, so the threads don't
    interfere with each other.
    
    Testing:
     - Ran locally dumping the symbols for all backend tests
     - Ran a Jenkins job that generates a minidump and triggers
       the minidump symbol processing. It went parallel and
       worked fine.
    
    Change-Id: I93427bb07f1d9718bd6df90acfd247210b54294d
    Reviewed-on: http://gerrit.cloudera.org:8080/20802
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
---
 bin/dump_breakpad_symbols.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py
index 81bf00d54..da8485bd4 100755
--- a/bin/dump_breakpad_symbols.py
+++ b/bin/dump_breakpad_symbols.py
@@ -56,8 +56,8 @@
 from __future__ import absolute_import, division, print_function
 import errno
 import logging
-import glob
 import magic
+import multiprocessing
 import os
 import shutil
 import subprocess
@@ -66,6 +66,7 @@ import tempfile
 
 from argparse import ArgumentParser
 from collections import namedtuple
+from multiprocessing.pool import ThreadPool
 
 BinarySymbolInfo = namedtuple('BinarySymbolInfo', 'path, debug_path')
 
@@ -137,6 +138,8 @@ def parse_args():
   parser.add_argument('-s', '--symbol_pkg', '--debuginfo_rpm', help="""RPM/DEB file
       containing the debug symbols matching the binaries in -r""")
   parser.add_argument('--objcopy', help='Path to the objcopy binary from Binutils')
+  parser.add_argument('--num_processes', type=int, default=multiprocessing.cpu_count(),
+      help="Number of parallel processes to use.")
   args = parser.parse_args()
 
   # Post processing checks
@@ -341,9 +344,20 @@ def main():
   assert objcopy
   status = 0
   ensure_dir_exists(args.dest_dir)
-  for binary in enumerate_binaries(args):
-    if not process_binary(dump_syms, objcopy, binary, args.dest_dir):
+  # Use a thread pool to go parallel
+  thread_pool = ThreadPool(processes=args.num_processes)
+
+  def processing_fn(binary):
+    return process_binary(dump_syms, objcopy, binary, args.dest_dir)
+
+  for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)):
+    if not result:
+      thread_pool.terminate()
       status = 1
+      break
+
+  thread_pool.close()
+  thread_pool.join()
   sys.exit(status)