You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/08 19:06:39 UTC
(impala) 04/04: IMPALA-10048: Go parallel for dump_breakpad_symbols.py
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3bcd770dfc51ed5148cf85a96f5644594f953319
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Jun 25 21:20:25 2023 -0700
IMPALA-10048: Go parallel for dump_breakpad_symbols.py
This modifies dump_breakpad_symbols.py to use a ThreadPool
to go parallel when there are multiple binaries or
libraries to process. This is common for Jenkins jobs that
dump symbols for all backend tests. The different binaries
write out to different directories, so the threads don't
interfere with each other.
Testing:
- Ran locally dumping the symbols for all backend tests
- Ran a Jenkins job that generates a minidump and triggers
the minidump symbol processing. It went parallel and
worked fine.
Change-Id: I93427bb07f1d9718bd6df90acfd247210b54294d
Reviewed-on: http://gerrit.cloudera.org:8080/20802
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Michael Smith <mi...@cloudera.com>
---
bin/dump_breakpad_symbols.py | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py
index 81bf00d54..da8485bd4 100755
--- a/bin/dump_breakpad_symbols.py
+++ b/bin/dump_breakpad_symbols.py
@@ -56,8 +56,8 @@
from __future__ import absolute_import, division, print_function
import errno
import logging
-import glob
import magic
+import multiprocessing
import os
import shutil
import subprocess
@@ -66,6 +66,7 @@ import tempfile
from argparse import ArgumentParser
from collections import namedtuple
+from multiprocessing.pool import ThreadPool
BinarySymbolInfo = namedtuple('BinarySymbolInfo', 'path, debug_path')
@@ -137,6 +138,8 @@ def parse_args():
parser.add_argument('-s', '--symbol_pkg', '--debuginfo_rpm', help="""RPM/DEB file
containing the debug symbols matching the binaries in -r""")
parser.add_argument('--objcopy', help='Path to the objcopy binary from Binutils')
+ parser.add_argument('--num_processes', type=int, default=multiprocessing.cpu_count(),
+ help="Number of parallel processes to use.")
args = parser.parse_args()
# Post processing checks
@@ -341,9 +344,20 @@ def main():
assert objcopy
status = 0
ensure_dir_exists(args.dest_dir)
- for binary in enumerate_binaries(args):
- if not process_binary(dump_syms, objcopy, binary, args.dest_dir):
+ # Use a thread pool to go parallel
+ thread_pool = ThreadPool(processes=args.num_processes)
+
+ def processing_fn(binary):
+ return process_binary(dump_syms, objcopy, binary, args.dest_dir)
+
+ for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)):
+ if not result:
+ thread_pool.terminate()
status = 1
+ break
+
+ thread_pool.close()
+ thread_pool.join()
sys.exit(status)