You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2019/01/04 19:03:46 UTC
[2/2] kudu git commit: build: Factor dependency extraction code from
dist_test into a python library
build: Factor dependency extraction code from dist_test into a python library
This will allow us to reuse this dependency extraction logic when
creating minicluster test binary artifacts that ship their dependencies.
There are no functional changes in this patch.
I ran dist-test a few times and everything seems to work fine:
- http://dist-test.cloudera.org/job?job_id=mpercy.1546559376.69253 (run)
- http://dist-test.cloudera.org/job?job_id=mpercy.1546559905.73493 (loop)
Change-Id: I0b4cbfceb053c61dbb1f1d16716acc8926987af2
Reviewed-on: http://gerrit.cloudera.org:8080/12153
Tested-by: Mike Percy <mp...@apache.org>
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d4481c03
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d4481c03
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d4481c03
Branch: refs/heads/master
Commit: d4481c03c34172bcd44f80b98112a8bed4956844
Parents: 83b70f8
Author: Mike Percy <mp...@apache.org>
Authored: Sat Nov 3 21:33:11 2018 -0500
Committer: Mike Percy <mp...@apache.org>
Committed: Fri Jan 4 19:03:16 2019 +0000
----------------------------------------------------------------------
build-support/dep_extract.py | 119 ++++++++++++++++++++++++++++++++++++++
build-support/dist_test.py | 80 +++++++------------------
2 files changed, 140 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/d4481c03/build-support/dep_extract.py
----------------------------------------------------------------------
diff --git a/build-support/dep_extract.py b/build-support/dep_extract.py
new file mode 100644
index 0000000..ba454b6
--- /dev/null
+++ b/build-support/dep_extract.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+import os
+import re
+import subprocess
+
+# Matches the output lines from the 'ldd' tool. For example:
+# libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
+#
+# Note: The following pattern will not match the following two types of
+# dependencies and so they will not be included in the output from this module:
+#
+# 1. The dynamic linker:
+# /lib64/ld-linux-x86-64.so.2 (0x00007f6f7ab79000)
+# 2. Linux virtual dynamic shared objects:
+# linux-vdso.so.1 (0x00007ffc06cfb000)
+#
+LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')
+
+class DependencyExtractor(object):
+ """
+ This class extracts native library dependencies from the given executable.
+ """
+ def __init__(self):
+ self.deps_cache = {}
+ self.lib_allowed_filter = lambda path: True
+ self.enable_expand_symlinks = False
+
+ def set_library_filter(self, lib_allowed_filter):
+ """
+ Specify a filter predicate that should return True iff the specified
+ library path should be included in the result from extract_deps().
+ By default, all libraries are included in the result.
+ """
+ self.lib_allowed_filter = lib_allowed_filter
+
+ def set_expand_symlinks(self, expand):
+ """
+ Specify whether symlinks should be expanded in the output from
+ extract_deps(). By default, symlinks are not expanded. See
+ expand_symlinks().
+ """
+ self.enable_expand_symlinks = expand
+
+ def expand_symlinks(self, deps):
+ """
+ ldd will often point to symlinks. Return a list including any symlink in
+ the specified dependency list as well as whatever it's pointing to,
+ recursively.
+ """
+ expanded = []
+ for path in deps:
+ expanded.append(path)
+ while os.path.islink(path):
+ # TODO(mpercy): os.readlink() can return an absolute path. Should we more carefully handle
+ # the path concatenation here?
+ path = os.path.join(os.path.dirname(path), os.readlink(path))
+ expanded.append(path)
+ return expanded
+
+ def extract_deps(self, exe):
+ """
+ Runs 'ldd' on the provided 'exe' path, returning a list of
+ any libraries it depends on. Blacklisted libraries are
+ removed from this list.
+
+ If the provided 'exe' is not a binary executable, returns
+ an empty list.
+ """
+ if (exe.endswith(".jar") or
+ exe.endswith(".pl") or
+ exe.endswith(".py") or
+ exe.endswith(".sh") or
+ exe.endswith(".txt") or
+ os.path.isdir(exe)):
+ return []
+
+ if exe not in self.deps_cache:
+ p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
+ out, err = p.communicate()
+ self.deps_cache[exe] = (out, err, p.returncode)
+
+ out, err, rc = self.deps_cache[exe]
+ if rc != 0:
+ logging.warning("failed to run ldd on %s", exe)
+ return []
+
+ deps = []
+ for line in out.splitlines():
+ match = LDD_RE.match(line)
+ if not match:
+ continue
+ dep = match.group(1)
+ # Apply the provided predicate.
+ if not self.lib_allowed_filter(dep):
+ continue
+ deps.append(dep)
+
+ if self.enable_expand_symlinks:
+ deps = self.expand_symlinks(deps)
+ return deps
http://git-wip-us.apache.org/repos/asf/kudu/blob/d4481c03/build-support/dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/dist_test.py b/build-support/dist_test.py
index 5a28083..01671ce 100755
--- a/build-support/dist_test.py
+++ b/build-support/dist_test.py
@@ -39,6 +39,7 @@ import shutil
import subprocess
import time
+from dep_extract import DependencyExtractor
from kudu_util import init_logging
TEST_TIMEOUT_SECS = int(os.environ.get('TEST_TIMEOUT_SECS', '900'))
@@ -76,10 +77,6 @@ TEST_COMMAND_RE = re.compile('Test command: (.+)$')
# 262: GTEST_TOTAL_SHARDS=1
TEST_ENV_RE = re.compile('^\d+: (\S+)=(.+)')
-# Matches the output lines of 'ldd'. For example:
-# libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
-LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')
-
DEPS_FOR_ALL = \
["build-support/stacktrace_addr2line.pl",
"build-support/run-test.sh",
@@ -219,14 +216,21 @@ def get_test_executions(tests_regex, extra_args=None):
return execs
-def is_lib_blacklisted(lib):
+def is_lib_whitelisted(lib):
# No need to ship things like libc, libstdcxx, etc.
if lib.startswith("/lib") or lib.startswith("/usr"):
- return True
- return False
+ return False
+ return True
+
+
+def create_dependency_extractor():
+ dep_extractor = DependencyExtractor()
+ dep_extractor.set_library_filter(is_lib_whitelisted)
+ dep_extractor.set_expand_symlinks(True)
+ return dep_extractor
-def get_base_deps():
+def get_base_deps(dep_extractor):
deps = []
for d in DEPS_FOR_ALL:
d = os.path.realpath(rel_to_abs(d))
@@ -236,7 +240,7 @@ def get_base_deps():
# DEPS_FOR_ALL may include binaries whose dependencies are not dependencies
# of the test executable. We must include those dependencies in the archive
# for the binaries to be usable.
- deps.extend(ldd_deps(d))
+ deps.extend(dep_extractor.extract_deps(d))
return deps
@@ -265,51 +269,7 @@ def copy_system_library(lib):
shutil.copy2(rel_to_abs(lib), dst)
return dst
-LDD_CACHE={}
-def ldd_deps(exe):
- """
- Runs 'ldd' on the provided 'exe' path, returning a list of
- any libraries it depends on. Blacklisted libraries are
- removed from this list.
-
- If the provided 'exe' is not a binary executable, returns
- an empty list.
- """
- if (exe.endswith(".jar") or
- exe.endswith(".pl") or
- exe.endswith(".py") or
- exe.endswith(".sh") or
- exe.endswith(".txt") or
- os.path.isdir(exe)):
- return []
- if exe not in LDD_CACHE:
- p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
- out, err = p.communicate()
- LDD_CACHE[exe] = (out, err, p.returncode)
- out, err, rc = LDD_CACHE[exe]
- if rc != 0:
- logging.warning("failed to run ldd on %s", exe)
- return []
- ret = []
- for l in out.splitlines():
- m = LDD_RE.match(l)
- if not m:
- continue
- lib = m.group(1)
- if is_lib_blacklisted(lib):
- continue
- path = m.group(1)
- ret.append(m.group(1))
-
- # ldd will often point to symlinks. We need to upload the symlink
- # as well as whatever it's pointing to, recursively.
- while os.path.islink(path):
- path = os.path.join(os.path.dirname(path), os.readlink(path))
- ret.append(path)
- return ret
-
-
-def create_archive_input(staging, execution,
+def create_archive_input(staging, execution, dep_extractor,
collect_tmpdir=False):
"""
Generates .gen.json and .isolate files corresponding to the
@@ -325,8 +285,8 @@ def create_archive_input(staging, execution,
argv[1] = rel_test_exe
files = []
files.append(rel_test_exe)
- deps = ldd_deps(abs_test_exe)
- deps.extend(get_base_deps())
+ deps = dep_extractor.extract_deps(abs_test_exe)
+ deps.extend(get_base_deps(dep_extractor))
# Deduplicate dependencies included via DEPS_FOR_ALL.
for d in set(deps):
@@ -480,8 +440,9 @@ def run_tests(parser, options):
for e in executions:
e.argv.extend(options.extra_args)
staging = StagingDir.new()
+ dep_extractor = create_dependency_extractor()
for execution in executions:
- create_archive_input(staging, execution,
+ create_archive_input(staging, execution, dep_extractor,
collect_tmpdir=options.collect_tmpdir)
run_isolate(staging)
retry_all = RETRY_ALL_TESTS > 0
@@ -545,8 +506,9 @@ def loop_test(parser, options):
e = executions[0]
e.env["GTEST_TOTAL_SHARDS"] = 1
e.env["GTEST_SHARD_INDEX"] = 0
+ dep_extractor = create_dependency_extractor()
for execution in executions:
- create_archive_input(staging, execution,
+ create_archive_input(staging, execution, dep_extractor,
collect_tmpdir=options.collect_tmpdir)
run_isolate(staging)
create_task_json(staging, options.num_instances)
@@ -616,7 +578,7 @@ def add_java_subparser(subparsers):
def dump_base_deps(parser, options):
- print json.dumps(get_base_deps())
+ print json.dumps(get_base_deps(create_dependency_extractor()))
def add_internal_commands(subparsers):