You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2018/07/04 20:08:43 UTC
[arrow] branch master updated: ARROW-2657: [Python] Import
TensorFlow python extension before pyarrow to avoid segfault
This is an automated email from the ASF dual-hosted git repository.
pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e7aaf7b ARROW-2657: [Python] Import TensorFlow python extension before pyarrow to avoid segfault
e7aaf7b is described below
commit e7aaf7bf3d3e326b5fe58d20f8fc45b5cec01cac
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Wed Jul 4 13:08:34 2018 -0700
ARROW-2657: [Python] Import TensorFlow python extension before pyarrow to avoid segfault
Author: Philipp Moritz <pc...@gmail.com>
Author: Wes McKinney <we...@apache.org>
Closes #2210 from pcmoritz/try-fixing-tf-crash and squashes the following commits:
92aef7a9 <Wes McKinney> Use compat namespace to avoid adding import_tensorflow_extension to pyarrow.* namespace
2ca3de9f <Philipp Moritz> clarify comment
70f3bcaa <Philipp Moritz> workaround for virtualenv
bbf6cfc3 <Philipp Moritz> load TensorFlow for sure if it exists
c18cccb6 <Philipp Moritz> address comments
ac38837d <Philipp Moritz> add clarification comment
1135b51b <Philipp Moritz> silence tensorflow installation
57ca5fc2 <Philipp Moritz> install conda to test wheels outside of docker
02cb5005 <Philipp Moritz> tests if the wheels work with tensorflow
7835fba1 <Philipp Moritz> check for linux
1c9628f1 <Philipp Moritz> try fixing tensorflow crash
---
ci/travis_script_manylinux.sh | 18 ++++++++++++++++++
python/pyarrow/__init__.py | 7 +++++++
python/pyarrow/compat.py | 42 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 67 insertions(+)
diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index 14e6404..9ea15e7 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -24,3 +24,21 @@ pushd python/manylinux1
git clone ../../ arrow
docker build -t arrow-base-x86_64 -f Dockerfile-x86_64 .
docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh
+
+# Testing for https://issues.apache.org/jira/browse/ARROW-2657
+# These tests cannot be run inside of the docker container, since TensorFlow
+# does not run on manylinux1
+
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
+source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
+
+PYTHON_VERSION=3.6
+CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+
+conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
+source activate $CONDA_ENV_DIR
+
+pip install -q tensorflow
+pip install "dist/`ls dist/ | grep cp36`"
+python -c "import pyarrow; import tensorflow"
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 20254c2..dc045e6 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -44,6 +44,13 @@ except DistributionNotFound:
__version__ = None
+import pyarrow.compat as compat
+
+
+# Workaround for https://issues.apache.org/jira/browse/ARROW-2657
+compat.import_tensorflow_extension()
+
+
from pyarrow.lib import cpu_count, set_cpu_count
from pyarrow.lib import (null, bool_,
int8, int16, int32, int64,
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 1b19ca0..1fcaf4c 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -160,6 +160,48 @@ def encode_file_path(path):
# will convert utf8 to utf16
return encoded_path
+def import_tensorflow_extension():
+ """
+ Load the TensorFlow extension if it exists.
+
+ This is used to load the TensorFlow extension before
+ pyarrow.lib. If we don't do this there are symbol clashes
+ between TensorFlow's use of threading and our global
+ thread pool, see also
+ https://issues.apache.org/jira/browse/ARROW-2657 and
+ https://github.com/apache/arrow/pull/2096.
+ """
+ import os
+ import site
+ tensorflow_loaded = False
+
+ # Try to load the tensorflow extension directly
+ # This is a performance optimization, tensorflow will always be
+ # loaded via the "import tensorflow" statement below if this
+ # doesn't succeed.
+ try:
+ site_paths = site.getsitepackages() + [site.getusersitepackages()]
+ except AttributeError:
+ # Workaround for https://github.com/pypa/virtualenv/issues/228,
+ # this happends in some configurations of virtualenv
+ site_paths = [os.path.dirname(site.__file__) + '/site-packages']
+ for site_path in site_paths:
+ ext = os.path.join(site_path, "tensorflow",
+ "libtensorflow_framework.so")
+ if os.path.exists(ext):
+ import ctypes
+ ctypes.CDLL(ext)
+ tensorflow_loaded = True
+ break
+
+ # If the above failed, try to load tensorflow the normal way
+ # (this is more expensive)
+ if not tensorflow_loaded:
+ try:
+ import tensorflow
+ except ImportError:
+ pass
+
integer_types = six.integer_types + (np.integer,)