You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2019/01/04 19:03:46 UTC

[2/2] kudu git commit: build: Factor dependency extraction code from dist_test into a python library

build: Factor dependency extraction code from dist_test into a python library

This will allow us to reuse this dependency extraction logic when
creating minicluster test binary artifacts that ship their dependencies.

There are no functional changes in this patch.

I ran dist-test a few times and everything seems to work fine:

 - http://dist-test.cloudera.org/job?job_id=mpercy.1546559376.69253 (run)
 - http://dist-test.cloudera.org/job?job_id=mpercy.1546559905.73493 (loop)

Change-Id: I0b4cbfceb053c61dbb1f1d16716acc8926987af2
Reviewed-on: http://gerrit.cloudera.org:8080/12153
Tested-by: Mike Percy <mp...@apache.org>
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d4481c03
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d4481c03
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d4481c03

Branch: refs/heads/master
Commit: d4481c03c34172bcd44f80b98112a8bed4956844
Parents: 83b70f8
Author: Mike Percy <mp...@apache.org>
Authored: Sat Nov 3 21:33:11 2018 -0500
Committer: Mike Percy <mp...@apache.org>
Committed: Fri Jan 4 19:03:16 2019 +0000

----------------------------------------------------------------------
 build-support/dep_extract.py | 119 ++++++++++++++++++++++++++++++++++++++
 build-support/dist_test.py   |  80 +++++++------------------
 2 files changed, 140 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d4481c03/build-support/dep_extract.py
----------------------------------------------------------------------
diff --git a/build-support/dep_extract.py b/build-support/dep_extract.py
new file mode 100644
index 0000000..ba454b6
--- /dev/null
+++ b/build-support/dep_extract.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+import os
+import re
+import subprocess
+
+# Matches the output lines from the 'ldd' tool. For example:
+#   libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
+#
+# Note: The following pattern will not match the following two types of
+# dependencies and so they will not be included in the output from this module:
+#
+# 1. The dynamic linker:
+#    /lib64/ld-linux-x86-64.so.2 (0x00007f6f7ab79000)
+# 2. Linux virtual dynamic shared objects:
+#    linux-vdso.so.1 (0x00007ffc06cfb000)
+#
+LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')
+
+class DependencyExtractor(object):
+  """
+  This class extracts native library dependencies from the given executable.
+  """
+  def __init__(self):
+    self.deps_cache = {}
+    self.lib_allowed_filter = lambda path: True
+    self.enable_expand_symlinks = False
+
+  def set_library_filter(self, lib_allowed_filter):
+    """
+    Specify a filter predicate that should return True iff the specified
+    library path should be included in the result from extract_deps().
+    By default, all libraries are included in the result.
+    """
+    self.lib_allowed_filter = lib_allowed_filter
+
+  def set_expand_symlinks(self, expand):
+    """
+    Specify whether symlinks should be expanded in the output from
+    extract_deps(). By default, symlinks are not expanded. See
+    expand_symlinks().
+    """
+    self.enable_expand_symlinks = expand
+
+  def expand_symlinks(self, deps):
+    """
+    ldd will often point to symlinks. Return a list including any symlink in
+    the specified dependency list as well as whatever it's pointing to,
+    recursively.
+    """
+    expanded = []
+    for path in deps:
+      expanded.append(path)
+      while os.path.islink(path):
+        # TODO(mpercy): os.readlink() can return an absolute path. Should we more carefully handle
+        # the path concatenation here?
+        path = os.path.join(os.path.dirname(path), os.readlink(path))
+        expanded.append(path)
+    return expanded
+
+  def extract_deps(self, exe):
+    """
+    Runs 'ldd' on the provided 'exe' path, returning a list of
+    any libraries it depends on. Blacklisted libraries are
+    removed from this list.
+
+    If the provided 'exe' is not a binary executable, returns
+    an empty list.
+    """
+    if (exe.endswith(".jar") or
+        exe.endswith(".pl") or
+        exe.endswith(".py") or
+        exe.endswith(".sh") or
+        exe.endswith(".txt") or
+        os.path.isdir(exe)):
+      return []
+
+    if exe not in self.deps_cache:
+      p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
+      out, err = p.communicate()
+      self.deps_cache[exe] = (out, err, p.returncode)
+
+    out, err, rc = self.deps_cache[exe]
+    if rc != 0:
+      logging.warning("failed to run ldd on %s", exe)
+      return []
+
+    deps = []
+    for line in out.splitlines():
+      match = LDD_RE.match(line)
+      if not match:
+        continue
+      dep = match.group(1)
+      # Apply the provided predicate.
+      if not self.lib_allowed_filter(dep):
+        continue
+      deps.append(dep)
+
+    if self.enable_expand_symlinks:
+      deps = self.expand_symlinks(deps)
+    return deps

http://git-wip-us.apache.org/repos/asf/kudu/blob/d4481c03/build-support/dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/dist_test.py b/build-support/dist_test.py
index 5a28083..01671ce 100755
--- a/build-support/dist_test.py
+++ b/build-support/dist_test.py
@@ -39,6 +39,7 @@ import shutil
 import subprocess
 import time
 
+from dep_extract import DependencyExtractor
 from kudu_util import init_logging
 
 TEST_TIMEOUT_SECS = int(os.environ.get('TEST_TIMEOUT_SECS', '900'))
@@ -76,10 +77,6 @@ TEST_COMMAND_RE = re.compile('Test command: (.+)$')
 #  262:  GTEST_TOTAL_SHARDS=1
 TEST_ENV_RE = re.compile('^\d+:  (\S+)=(.+)')
 
-# Matches the output lines of 'ldd'. For example:
-#   libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
-LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')
-
 DEPS_FOR_ALL = \
     ["build-support/stacktrace_addr2line.pl",
      "build-support/run-test.sh",
@@ -219,14 +216,21 @@ def get_test_executions(tests_regex, extra_args=None):
   return execs
 
 
-def is_lib_blacklisted(lib):
+def is_lib_whitelisted(lib):
   # No need to ship things like libc, libstdcxx, etc.
   if lib.startswith("/lib") or lib.startswith("/usr"):
-    return True
-  return False
+    return False
+  return True
+
+
+def create_dependency_extractor():
+  dep_extractor = DependencyExtractor()
+  dep_extractor.set_library_filter(is_lib_whitelisted)
+  dep_extractor.set_expand_symlinks(True)
+  return dep_extractor
 
 
-def get_base_deps():
+def get_base_deps(dep_extractor):
   deps = []
   for d in DEPS_FOR_ALL:
     d = os.path.realpath(rel_to_abs(d))
@@ -236,7 +240,7 @@ def get_base_deps():
     # DEPS_FOR_ALL may include binaries whose dependencies are not dependencies
     # of the test executable. We must include those dependencies in the archive
     # for the binaries to be usable.
-    deps.extend(ldd_deps(d))
+    deps.extend(dep_extractor.extract_deps(d))
   return deps
 
 
@@ -265,51 +269,7 @@ def copy_system_library(lib):
     shutil.copy2(rel_to_abs(lib), dst)
   return dst
 
-LDD_CACHE={}
-def ldd_deps(exe):
-  """
-  Runs 'ldd' on the provided 'exe' path, returning a list of
-  any libraries it depends on. Blacklisted libraries are
-  removed from this list.
-
-  If the provided 'exe' is not a binary executable, returns
-  an empty list.
-  """
-  if (exe.endswith(".jar") or
-      exe.endswith(".pl") or
-      exe.endswith(".py") or
-      exe.endswith(".sh") or
-      exe.endswith(".txt") or
-      os.path.isdir(exe)):
-    return []
-  if exe not in LDD_CACHE:
-    p = subprocess.Popen(["ldd", exe], stdout=subprocess.PIPE)
-    out, err = p.communicate()
-    LDD_CACHE[exe] = (out, err, p.returncode)
-  out, err, rc = LDD_CACHE[exe]
-  if rc != 0:
-    logging.warning("failed to run ldd on %s", exe)
-    return []
-  ret = []
-  for l in out.splitlines():
-    m = LDD_RE.match(l)
-    if not m:
-      continue
-    lib = m.group(1)
-    if is_lib_blacklisted(lib):
-      continue
-    path = m.group(1)
-    ret.append(m.group(1))
-
-    # ldd will often point to symlinks. We need to upload the symlink
-    # as well as whatever it's pointing to, recursively.
-    while os.path.islink(path):
-      path = os.path.join(os.path.dirname(path), os.readlink(path))
-      ret.append(path)
-  return ret
-
-
-def create_archive_input(staging, execution,
+def create_archive_input(staging, execution, dep_extractor,
                          collect_tmpdir=False):
   """
   Generates .gen.json and .isolate files corresponding to the
@@ -325,8 +285,8 @@ def create_archive_input(staging, execution,
   argv[1] = rel_test_exe
   files = []
   files.append(rel_test_exe)
-  deps = ldd_deps(abs_test_exe)
-  deps.extend(get_base_deps())
+  deps = dep_extractor.extract_deps(abs_test_exe)
+  deps.extend(get_base_deps(dep_extractor))
 
   # Deduplicate dependencies included via DEPS_FOR_ALL.
   for d in set(deps):
@@ -480,8 +440,9 @@ def run_tests(parser, options):
     for e in executions:
       e.argv.extend(options.extra_args)
   staging = StagingDir.new()
+  dep_extractor = create_dependency_extractor()
   for execution in executions:
-    create_archive_input(staging, execution,
+    create_archive_input(staging, execution, dep_extractor,
                          collect_tmpdir=options.collect_tmpdir)
   run_isolate(staging)
   retry_all = RETRY_ALL_TESTS > 0
@@ -545,8 +506,9 @@ def loop_test(parser, options):
     e = executions[0]
     e.env["GTEST_TOTAL_SHARDS"] = 1
     e.env["GTEST_SHARD_INDEX"] = 0
+  dep_extractor = create_dependency_extractor()
   for execution in executions:
-    create_archive_input(staging, execution,
+    create_archive_input(staging, execution, dep_extractor,
                          collect_tmpdir=options.collect_tmpdir)
   run_isolate(staging)
   create_task_json(staging, options.num_instances)
@@ -616,7 +578,7 @@ def add_java_subparser(subparsers):
 
 
 def dump_base_deps(parser, options):
-  print json.dumps(get_base_deps())
+  print json.dumps(get_base_deps(create_dependency_extractor()))
 
 
 def add_internal_commands(subparsers):