You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2018/07/04 20:08:43 UTC

[arrow] branch master updated: ARROW-2657: [Python] Import TensorFlow python extension before pyarrow to avoid segfault

This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e7aaf7b  ARROW-2657: [Python] Import TensorFlow python extension before pyarrow to avoid segfault
e7aaf7b is described below

commit e7aaf7bf3d3e326b5fe58d20f8fc45b5cec01cac
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Wed Jul 4 13:08:34 2018 -0700

    ARROW-2657: [Python] Import TensorFlow python extension before pyarrow to avoid segfault
    
    Author: Philipp Moritz <pc...@gmail.com>
    Author: Wes McKinney <we...@apache.org>
    
    Closes #2210 from pcmoritz/try-fixing-tf-crash and squashes the following commits:
    
    92aef7a9 <Wes McKinney> Use compat namespace to avoid adding import_tensorflow_extension to pyarrow.* namespace
    2ca3de9f <Philipp Moritz> clarify comment
    70f3bcaa <Philipp Moritz> workaround for virtualenv
    bbf6cfc3 <Philipp Moritz> load TensorFlow for sure if it exists
    c18cccb6 <Philipp Moritz> address comments
    ac38837d <Philipp Moritz> add clarification comment
    1135b51b <Philipp Moritz> silence tensorflow installation
    57ca5fc2 <Philipp Moritz> install conda to test wheels outside of docker
    02cb5005 <Philipp Moritz> tests if the wheels work with tensorflow
    7835fba1 <Philipp Moritz> check for linux
    1c9628f1 <Philipp Moritz> try fixing tensorflow crash
---
 ci/travis_script_manylinux.sh | 18 ++++++++++++++++++
 python/pyarrow/__init__.py    |  7 +++++++
 python/pyarrow/compat.py      | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)

diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index 14e6404..9ea15e7 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -24,3 +24,21 @@ pushd python/manylinux1
 git clone ../../ arrow
 docker build -t arrow-base-x86_64 -f Dockerfile-x86_64 .
 docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh
+
+# Testing for https://issues.apache.org/jira/browse/ARROW-2657
+# These tests cannot be run inside of the docker container, since TensorFlow
+# does not run on manylinux1
+
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
+source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
+
+PYTHON_VERSION=3.6
+CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+
+conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
+source activate $CONDA_ENV_DIR
+
+pip install -q tensorflow
+pip install "dist/`ls dist/ | grep cp36`"
+python -c "import pyarrow; import tensorflow"
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 20254c2..dc045e6 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -44,6 +44,13 @@ except DistributionNotFound:
         __version__ = None
 
 
+import pyarrow.compat as compat
+
+
+# Workaround for https://issues.apache.org/jira/browse/ARROW-2657
+compat.import_tensorflow_extension()
+
+
 from pyarrow.lib import cpu_count, set_cpu_count
 from pyarrow.lib import (null, bool_,
                          int8, int16, int32, int64,
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 1b19ca0..1fcaf4c 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -160,6 +160,48 @@ def encode_file_path(path):
     # will convert utf8 to utf16
     return encoded_path
 
+def import_tensorflow_extension():
+    """
+    Load the TensorFlow extension if it exists.
+
+    This is used to load the TensorFlow extension before
+    pyarrow.lib. If we don't do this there are symbol clashes
+    between TensorFlow's use of threading and our global
+    thread pool, see also
+    https://issues.apache.org/jira/browse/ARROW-2657 and
+    https://github.com/apache/arrow/pull/2096.
+    """
+    import os
+    import site
+    tensorflow_loaded = False
+
+    # Try to load the tensorflow extension directly
+    # This is a performance optimization, tensorflow will always be
+    # loaded via the "import tensorflow" statement below if this
+    # doesn't succeed.
+    try:
+        site_paths = site.getsitepackages() + [site.getusersitepackages()]
+    except AttributeError:
+        # Workaround for https://github.com/pypa/virtualenv/issues/228,
+        # this happends in some configurations of virtualenv
+        site_paths = [os.path.dirname(site.__file__) + '/site-packages']
+    for site_path in site_paths:
+        ext = os.path.join(site_path, "tensorflow",
+                           "libtensorflow_framework.so")
+        if os.path.exists(ext):
+            import ctypes
+            ctypes.CDLL(ext)
+            tensorflow_loaded = True
+            break
+
+    # If the above failed, try to load tensorflow the normal way
+    # (this is more expensive)
+    if not tensorflow_loaded:
+        try:
+            import tensorflow
+        except ImportError:
+            pass
+
 
 integer_types = six.integer_types + (np.integer,)