You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2019/02/28 23:17:55 UTC
[arrow] branch master updated: ARROW-4690: Building TensorFlow
compatible wheels for Arrow
This is an automated email from the ASF dual-hosted git repository.
robertnishihara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b766bff ARROW-4690: Building TensorFlow compatible wheels for Arrow
b766bff is described below
commit b766bff34b7d85034d26cebef5b3aeef1eb2fd82
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Thu Feb 28 15:17:42 2019 -0800
ARROW-4690: Building TensorFlow compatible wheels for Arrow
This includes a Dockerfile that can be used to create wheels based on ubuntu 14.04 which are compatible with TensorFlow.
TODO before this can be merged:
- [x] write documentation how to build this
- [x] do more testing
Author: Philipp Moritz <pc...@gmail.com>
Closes #3766 from pcmoritz/ubuntu-wheels and squashes the following commits:
f708c29b <Philipp Moritz> remove tensorflow import check
599ce2e7 <Philipp Moritz> fix manylinux1 build instructions
f1fbedf8 <Philipp Moritz> remove tensorflow hacks
bf47f579 <Philipp Moritz> improve wording
4fb1d38b <Philipp Moritz> add documentation
078be98b <Philipp Moritz> add licenses
0ab0bccb <Philipp Moritz> cleanup
c7ab1395 <Philipp Moritz> fix
eae775d5 <Philipp Moritz> update
2820363e <Philipp Moritz> update
ed683309 <Philipp Moritz> update
e8c96ecf <Philipp Moritz> update
8a3b19e8 <Philipp Moritz> update
0fcc3730 <Philipp Moritz> update
fd387797 <Philipp Moritz> update
78dcf42d <Philipp Moritz> update
7726bb6a <Philipp Moritz> update
82ae4828 <Philipp Moritz> update
f44082ea <Philipp Moritz> update
deb30bfd <Philipp Moritz> update
50e40320 <Philipp Moritz> update
58f6c121 <Philipp Moritz> update
5e8ca589 <Philipp Moritz> update
5fa73dd5 <Philipp Moritz> update
595d0fe1 <Philipp Moritz> update
79006722 <Philipp Moritz> add libffi-dev
9ff5236d <Philipp Moritz> update
ca972ad0 <Philipp Moritz> update
60805e22 <Philipp Moritz> update
7a66ba35 <Philipp Moritz> update
1b56d1f1 <Philipp Moritz> zlib
eedef794 <Philipp Moritz> update
3ae2b5ab <Philipp Moritz> update
df297e1c <Philipp Moritz> add python build script
358e4f85 <Philipp Moritz> update
65afcebe <Philipp Moritz> update
11ccfc7e <Philipp Moritz> update
f1784245 <Philipp Moritz> update
b3039c8b <Philipp Moritz> update
9064c3ca <Philipp Moritz> update
c39f92a9 <Philipp Moritz> install tensorflow
ec4e2210 <Philipp Moritz> unicode
773ca2b6 <Philipp Moritz> link python
b690d64a <Philipp Moritz> update
5ce7f0d6 <Philipp Moritz> update
a9302fce <Philipp Moritz> install python-dev
f12e0cfe <Philipp Moritz> multibuild python 2.7
9342006b <Philipp Moritz> add git
ab2ef8e7 <Philipp Moritz> fix cmake install
cef997b5 <Philipp Moritz> install cmake and ninja
5d560faf <Philipp Moritz> add build-essential
adf2f705 <Philipp Moritz> add curl
f8d66963 <Philipp Moritz> remove xz
e439356e <Philipp Moritz> apt update
79fe557e <Philipp Moritz> add docker image for ubuntu wheel
---
LICENSE.txt | 11 ++
ci/travis_script_manylinux.sh | 1 -
cpp/cmake_modules/FindFlatbuffers.cmake | 2 +-
python/manylinux1/Dockerfile-x86_64_ubuntu | 95 +++++++++++
python/manylinux1/README.md | 40 ++++-
python/manylinux1/build_arrow.sh | 32 ++--
python/manylinux1/scripts/build_python.sh | 218 ++++++++++++++++++++++++
python/manylinux1/scripts/python-tag-abi-tag.py | 30 ++++
python/manylinux1/scripts/requirements.txt | 34 ++++
python/pyarrow/__init__.py | 7 -
python/pyarrow/compat.py | 106 ------------
11 files changed, 448 insertions(+), 128 deletions(-)
diff --git a/LICENSE.txt b/LICENSE.txt
index 58d6671..582daa4 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -826,3 +826,14 @@ This project includes code from Snappy.
Copyright: 2009 Google Inc. All rights reserved.
Homepage: https://github.com/google/snappy
License: 3-clause BSD
+
+--------------------------------------------------------------------------------
+
+This project includes code from the manylinux project.
+
+* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py,
+ requirements.txt} are based on code from the manylinux project.
+
+Copyright: 2016 manylinux
+Homepage: https://github.com/pypa/manylinux
+License: The MIT License (MIT)
diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index fa02c82..8f2bd88 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -35,7 +35,6 @@ import pyarrow
import pyarrow.orc
import pyarrow.parquet
import pyarrow.plasma
-import tensorflow
if sys.version_info.major > 2:
import pyarrow.gandiva
diff --git a/cpp/cmake_modules/FindFlatbuffers.cmake b/cpp/cmake_modules/FindFlatbuffers.cmake
index 975c869..f97286f 100644
--- a/cpp/cmake_modules/FindFlatbuffers.cmake
+++ b/cpp/cmake_modules/FindFlatbuffers.cmake
@@ -45,7 +45,7 @@ if ( _flatbuffers_roots )
PATH_SUFFIXES "include" )
find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers
PATHS "${_flatbuffers_roots}" NO_DEFAULT_PATH
- PATH_SUFFIXES "lib" "lib64")
+ PATH_SUFFIXES "lib" "lib64" "lib/x86_64-linux-gnu/")
else ()
find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h )
find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers )
diff --git a/python/manylinux1/Dockerfile-x86_64_ubuntu b/python/manylinux1/Dockerfile-x86_64_ubuntu
new file mode 100644
index 0000000..c2b5247
--- /dev/null
+++ b/python/manylinux1/Dockerfile-x86_64_ubuntu
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+FROM ubuntu:14.04
+
+# Install dependencies
+RUN apt update
+RUN apt install -y ccache flex wget curl build-essential git libffi-dev autoconf pkg-config
+
+ADD scripts/build_zlib.sh /
+RUN /build_zlib.sh
+
+# Install python
+ADD scripts/requirements.txt /
+ADD scripts/build_python.sh /
+ADD scripts/python-tag-abi-tag.py /
+RUN /build_python.sh
+
+# Install cmake manylinux1 package
+ADD scripts/install_cmake.sh /
+RUN /install_cmake.sh
+
+WORKDIR /
+RUN git clone https://github.com/matthew-brett/multibuild.git && cd multibuild && git checkout ffe59955ad8690c2f8bb74766cb7e9b0d0ee3963
+
+ADD scripts/build_virtualenvs.sh /
+RUN /build_virtualenvs.sh
+
+ADD scripts/build_openssl.sh /
+RUN /build_openssl.sh
+
+ADD scripts/build_boost.sh /
+RUN /build_boost.sh
+
+ADD scripts/build_gtest.sh /
+RUN /build_gtest.sh
+ENV GTEST_HOME /usr
+
+ADD scripts/build_flatbuffers.sh /
+RUN /build_flatbuffers.sh
+ENV FLATBUFFERS_HOME /usr
+
+ADD scripts/build_bison.sh /
+RUN /build_bison.sh
+
+ADD scripts/build_thrift.sh /
+RUN /build_thrift.sh
+ENV THRIFT_HOME /usr
+
+ADD scripts/build_brotli.sh /
+RUN /build_brotli.sh
+ENV BROTLI_HOME /usr
+
+ADD scripts/build_snappy.sh /
+RUN /build_snappy.sh
+ENV SNAPPY_HOME /usr
+
+ADD scripts/build_lz4.sh /
+RUN /build_lz4.sh
+ENV LZ4_HOME /usr
+
+ADD scripts/build_zstd.sh /
+RUN /build_zstd.sh
+ENV ZSTD_HOME /usr
+
+ADD scripts/build_ccache.sh /
+RUN /build_ccache.sh
+
+ADD scripts/build_protobuf.sh /
+RUN /build_protobuf.sh
+ENV PROTOBUF_HOME /usr
+
+ADD scripts/build_glog.sh /
+RUN /build_glog.sh
+ENV GLOG_HOME /usr
+
+
+ADD scripts/build_llvm.sh /
+RUN /build_llvm.sh
+
+ADD scripts/build_clang.sh /
+RUN /build_clang.sh
diff --git a/python/manylinux1/README.md b/python/manylinux1/README.md
index de99f1c..06288f4 100644
--- a/python/manylinux1/README.md
+++ b/python/manylinux1/README.md
@@ -31,9 +31,14 @@ for all supported Python versions and place them in the `dist` folder.
### Build instructions
+You can build the wheels with the following
+command (this is for Python 2.7 with unicode width 16, similarly you can pass
+in `PYTHON_VERSION="3.5"`, `PYTHON_VERSION="3.6"` or `PYTHON_VERSION="3.7"` or
+use `PYTHON_VERSION="2.7"` with `UNICODE_WIDTH=32`):
+
```bash
# Build the python packages
-docker run --shm-size=2g --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
+docker run --env PYTHON_VERSION="2.7" --env UNICODE_WIDTH=16 --shm-size=2g --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
# Now the new packages are located in the dist/ folder
ls -l dist/
```
@@ -53,3 +58,36 @@ build the sources are removed again so that only the binary installation of a
dependency is persisted in the docker image. When you do local adjustments to
this image, you need to change the name of the docker image in the `docker run`
command.
+
+## TensorFlow compatible wheels for Arrow
+
+As TensorFlow is not compatible with the manylinux1 standard, the above
+wheels can cause segfaults if they are used together with the TensorFlow wheels
+from https://www.tensorflow.org/install/pip. We do not recommend using
+TensorFlow wheels with pyarrow manylinux1 wheels until these incompatibilities
+are addressed by the TensorFlow team [1]. For most end-users, the recommended
+way to use Arrow together with TensorFlow is through conda.
+If this is not an option for you, there is also a way to produce TensorFlow
+compatible Arrow wheels that however do not conform to the manylinux1 standard
+and are not officially supported by the Arrow community.
+
+Similar to the manylinux1 wheels, there is a base image that can be built with
+
+```bash
+docker build -t arrow_linux_x86_64_base -f Dockerfile-x86_64_ubuntu .
+```
+
+Once the image has been built, you can then build the wheels with the following
+command (this is for Python 2.7 with unicode width 16, similarly you can pass
+in `PYTHON_VERSION="3.5"`, `PYTHON_VERSION="3.6"` or `PYTHON_VERSION="3.7"` or
+use `PYTHON_VERSION="2.7"` with `UNICODE_WIDTH=32`)
+
+```bash
+# Build the python packages
+sudo docker run --env UBUNTU_WHEELS=1 --env PYTHON_VERSION="2.7" --env UNICODE_WIDTH=16 --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow arrow_linux_x86_64_base:latest /io/build_arrow.sh
+# Now the new packages are located in the dist/ folder
+ls -l dist/
+echo "Please note that these wheels are not manylinux1 compliant"
+```
+
+[1] https://groups.google.com/a/tensorflow.org/d/topic/developers/TMqRaT-H2bI/discussion
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index e0475bb..7738e3d 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -28,6 +28,9 @@ source /multibuild/manylinux_utils.sh
# Quit on failure
set -e
+# Print commands for debugging
+set -x
+
cd /arrow/python
# PyArrow build configuration
@@ -108,15 +111,19 @@ PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \
PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel
PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist
-echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ==="
-mkdir -p repaired_wheels/
-auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
+if [ -n "$UBUNTU_WHEELS" ]; then
+ echo "=== (${PYTHON_VERSION}) Wheels are not compatible with manylinux1 ==="
+ mv dist/pyarrow-*.whl /io/dist
+else
+ echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ==="
+ mkdir -p repaired_wheels/
+ auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
-# Install the built wheels
-$PIP install repaired_wheels/*.whl
+ # Install the built wheels
+ $PIP install repaired_wheels/*.whl
-# Test that the modules are importable
-$PYTHON_INTERPRETER -c "
+ # Test that the modules are importable
+ $PYTHON_INTERPRETER -c "
import sys
import pyarrow
import pyarrow.orc
@@ -125,9 +132,10 @@ import pyarrow.plasma
if sys.version_info.major > 2:
import pyarrow.gandiva
-"
+ "
-# More thorough testing happens outsite of the build to prevent
-# packaging issues like ARROW-4372
-mv dist/*.tar.gz /io/dist
-mv repaired_wheels/*.whl /io/dist
+ # More thorough testing happens outsite of the build to prevent
+ # packaging issues like ARROW-4372
+ mv dist/*.tar.gz /io/dist
+ mv repaired_wheels/*.whl /io/dist
+fi
diff --git a/python/manylinux1/scripts/build_python.sh b/python/manylinux1/scripts/build_python.sh
new file mode 100755
index 0000000..d116d43
--- /dev/null
+++ b/python/manylinux1/scripts/build_python.sh
@@ -0,0 +1,218 @@
+#!/bin/bash -e
+# Copyright (c) 2016 manylinux
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# The following is taken from docker/build_scripts/build_env.sh,
+# docker/build_scripts/build_utils.sh and
+# docker/build_scripts/build.sh from the manylinux1 project
+# (https://github.com/pypa/manylinux/).
+
+PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
+CPYTHON_VERSIONS="2.7.15 3.4.9 3.5.6 3.6.8 3.7.2"
+
+# openssl version to build, with expected sha256 hash of .tar.gz
+# archive.
+OPENSSL_ROOT=openssl-1.0.2q
+OPENSSL_HASH=5744cfcbcec2b1b48629f7354203bc1e5e9b5466998bbccc5b5fcde3b18eb684
+OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source
+
+# Update to slightly newer, verified Git commit:
+# https://github.com/NixOS/patchelf/commit/2a9cefd7d637d160d12dc7946393778fa8abbc58
+PATCHELF_VERSION=2a9cefd7d637d160d12dc7946393778fa8abbc58
+PATCHELF_HASH=12da4727f09be42ae0b54878e1b8e86d85cb7a5b595731cdc1a0a170c4873c6d
+
+CURL_ROOT=curl-7.61.1
+CURL_HASH=eaa812e9a871ea10dbe8e1d3f8f12a64a8e3e62aeab18cb23742e2f1727458ae
+CURL_DOWNLOAD_URL=https://curl.haxx.se/download
+
+AUTOCONF_ROOT=autoconf-2.69
+AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
+AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf
+AUTOMAKE_ROOT=automake-1.16.1
+AUTOMAKE_HASH=608a97523f97db32f1f5d5615c98ca69326ced2054c9f82e65bade7fc4c9dea8
+AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake
+LIBTOOL_ROOT=libtool-2.4.6
+LIBTOOL_HASH=e3bd4d5d3d025a36c21dd6af7ea818a2afcd4dfc1ea5a17b39d7854bcd0c06e3
+LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool
+
+SQLITE_AUTOCONF_VERSION=sqlite-autoconf-3230100
+SQLITE_AUTOCONF_HASH=92842b283e5e744eff5da29ed3c69391de7368fccc4d0ee6bf62490ce555ef25
+SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2018
+
+GIT_ROOT=2.19.1
+GIT_HASH=ba2fed9d02e424b735e035c4f2b0bdb168ef0df7e35156b5051d900dc7247787
+GIT_DOWNLOAD_URL=https://github.com/git/git/archive
+
+GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
+EPEL_RPM_HASH=0dcc89f9bf67a2a515bad64569b7a9615edc5e018f676a578d5fd0f17d3c81d4
+DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
+
+function check_var {
+ if [ -z "$1" ]; then
+ echo "required variable not defined"
+ exit 1
+ fi
+}
+
+function lex_pyver {
+ # Echoes Python version string padded with zeros
+ # Thus:
+ # 3.2.1 -> 003002001
+ # 3 -> 003000000
+ echo $1 | awk -F "." '{printf "%03d%03d%03d", $1, $2, $3}'
+}
+
+function pyver_dist_dir {
+ # Echoes the dist directory name of given pyver, removing alpha/beta prerelease
+ # Thus:
+ # 3.2.1 -> 3.2.1
+ # 3.7.0b4 -> 3.7.0
+ echo $1 | awk -F "." '{printf "%d.%d.%d", $1, $2, $3}'
+}
+
+function do_cpython_build {
+ local py_ver=$1
+ check_var $py_ver
+ local ucs_setting=$2
+ check_var $ucs_setting
+ tar -xzf Python-$py_ver.tgz
+ pushd Python-$py_ver
+ if [ "$ucs_setting" = "none" ]; then
+ unicode_flags=""
+ dir_suffix=""
+ else
+ local unicode_flags="--enable-unicode=$ucs_setting"
+ local dir_suffix="-$ucs_setting"
+ fi
+ local prefix="/opt/_internal/cpython-${py_ver}${dir_suffix}"
+ mkdir -p ${prefix}/lib
+ ./configure --prefix=${prefix} --disable-shared $unicode_flags > /dev/null
+ make -j2 > /dev/null
+ make install > /dev/null
+ popd
+ rm -rf Python-$py_ver
+ # Some python's install as bin/python3. Make them available as
+ # bin/python.
+ if [ -e ${prefix}/bin/python3 ]; then
+ ln -s python3 ${prefix}/bin/python
+ fi
+ # --force-reinstall is to work around:
+ # https://github.com/pypa/pip/issues/5220
+ # https://github.com/pypa/get-pip/issues/19
+ ${prefix}/bin/python get-pip.py --force-reinstall
+ if [ -e ${prefix}/bin/pip3 ] && [ ! -e ${prefix}/bin/pip ]; then
+ ln -s pip3 ${prefix}/bin/pip
+ fi
+ # Since we fall back on a canned copy of get-pip.py, we might not have
+ # the latest pip and friends. Upgrade them to make sure.
+ ${prefix}/bin/pip install -U --require-hashes -r ${MY_DIR}/requirements.txt
+ local abi_tag=$(${prefix}/bin/python ${MY_DIR}/python-tag-abi-tag.py)
+ ln -s ${prefix} /opt/python/${abi_tag}
+}
+
+
+function build_cpython {
+ local py_ver=$1
+ check_var $py_ver
+ check_var $PYTHON_DOWNLOAD_URL
+ local py_dist_dir=$(pyver_dist_dir $py_ver)
+ curl -fsSLO $PYTHON_DOWNLOAD_URL/$py_dist_dir/Python-$py_ver.tgz
+ curl -fsSLO $PYTHON_DOWNLOAD_URL/$py_dist_dir/Python-$py_ver.tgz.asc
+ if [ $(lex_pyver $py_ver) -lt $(lex_pyver 3.3) ]; then
+ do_cpython_build $py_ver ucs2
+ do_cpython_build $py_ver ucs4
+ else
+ do_cpython_build $py_ver none
+ fi
+ rm -f Python-$py_ver.tgz
+ rm -f Python-$py_ver.tgz.asc
+}
+
+
+function build_cpythons {
+ check_var $GET_PIP_URL
+ curl -fsSLO $GET_PIP_URL
+ for py_ver in $@; do
+ build_cpython $py_ver
+ done
+ rm -f get-pip.py
+}
+
+function do_openssl_build {
+ ./config no-ssl2 no-shared -fPIC --prefix=/usr/local/ssl > /dev/null
+ make > /dev/null
+ make install_sw > /dev/null
+}
+
+
+function check_required_source {
+ local file=$1
+ check_var ${file}
+ if [ ! -f $file ]; then
+ echo "Required source archive must be prefetched to docker/sources/ with prefetch.sh: $file"
+ return 1
+ fi
+}
+
+
+function fetch_source {
+ # This is called both inside and outside the build context (e.g. in Travis) to prefetch
+ # source tarballs, where curl exists (and works)
+ local file=$1
+ check_var ${file}
+ local url=$2
+ check_var ${url}
+ if [ -f ${file} ]; then
+ echo "${file} exists, skipping fetch"
+ else
+ curl -fsSL -o ${file} ${url}/${file}
+ fi
+}
+
+
+function check_sha256sum {
+ local fname=$1
+ check_var ${fname}
+ local sha256=$2
+ check_var ${sha256}
+
+ echo "${sha256} ${fname}" > ${fname}.sha256
+ sha256sum -c ${fname}.sha256
+ rm -f ${fname}.sha256
+}
+
+
+function build_openssl {
+ local openssl_fname=$1
+ check_var ${openssl_fname}
+ local openssl_sha256=$2
+ check_var ${openssl_sha256}
+ # Can't use curl here because we don't have it yet, OpenSSL must be prefetched
+ fetch_source ${openssl_fname}.tar.gz ${OPENSSL_DOWNLOAD_URL}
+ check_sha256sum ${openssl_fname}.tar.gz ${openssl_sha256}
+ tar -xzf ${openssl_fname}.tar.gz
+ (cd ${openssl_fname} && do_openssl_build)
+ rm -rf ${openssl_fname} ${openssl_fname}.tar.gz
+}
+
+build_openssl $OPENSSL_ROOT $OPENSSL_HASH
+
+mkdir -p /opt/python
+build_cpythons $CPYTHON_VERSIONS
diff --git a/python/manylinux1/scripts/python-tag-abi-tag.py b/python/manylinux1/scripts/python-tag-abi-tag.py
new file mode 100644
index 0000000..212ab54
--- /dev/null
+++ b/python/manylinux1/scripts/python-tag-abi-tag.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2016 manylinux
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# The following is taken from docker/build_scripts/python-tag-abi-tag.py
+# from the manylinux1 project (https://github.com/pypa/manylinux/).
+
+# Utility script to print the python tag + the abi tag for a Python
+# See PEP 425 for exactly what these are, but an example would be:
+# cp27-cp27mu
+
+from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+
+print("{0}{1}-{2}".format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))
diff --git a/python/manylinux1/scripts/requirements.txt b/python/manylinux1/scripts/requirements.txt
new file mode 100644
index 0000000..38a32df
--- /dev/null
+++ b/python/manylinux1/scripts/requirements.txt
@@ -0,0 +1,34 @@
+# Copyright (c) 2016 manylinux
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# The following is taken from docker/build_scripts/requirements.txt
+# from the manylinux1 project (https://github.com/pypa/manylinux/).
+
+# pip requirements for all cpythons
+# NOTE: pip has GPG signatures; could download and verify independently.
+pip==19.0.3 \
+ --hash=sha256:6e6f197a1abfb45118dbb878b5c859a0edbdd33fd250100bc015b67fded4b9f2 \
+ --hash=sha256:bd812612bbd8ba84159d9ddc0266b7fbce712fc9bc98c82dee5750546ec8ec64
+wheel==0.31.1 \
+ --hash=sha256:80044e51ec5bbf6c894ba0bc48d26a8c20a9ba629f4ca19ea26ecfcf87685f5f \
+ --hash=sha256:0a2e54558a0628f2145d2fc822137e322412115173e8a2ddbe1c9024338ae83c
+setuptools==40.7.3 \
+ --hash=sha256:4f4acaf06d617dccfd3fbbc9fbd83cf4749759a1fa2bdf589206a3278e0d537a \
+ --hash=sha256:702fdd31cb10a65a94beba1a7d89219a58d2587a349e0a1b7827b133e99ca430
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index dabcdf1..e74a9a1 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -44,13 +44,6 @@ except ImportError:
import pyarrow.compat as compat
-# Workaround for https://issues.apache.org/jira/browse/ARROW-2657
-# and https://issues.apache.org/jira/browse/ARROW-2920
-if _sys.platform in ('linux', 'linux2'):
- compat.import_tensorflow_extension()
- compat.import_pytorch_extension()
-
-
from pyarrow.lib import cpu_count, set_cpu_count
from pyarrow.lib import (null, bool_,
int8, int16, int32, int64,
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index ee924ed..e41de73 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -163,112 +163,6 @@ def encode_file_path(path):
# will convert utf8 to utf16
return encoded_path
-def _iterate_python_module_paths(package_name):
- """
- Return an iterator to full paths of a python package.
-
- This is a best effort and might fail.
- It uses the official way of loading modules from
- https://docs.python.org/3/library/importlib.html#approximating-importlib-import-module
- """
- if PY2:
- import imp
- try:
- _, pathname, _ = imp.find_module(package_name)
- except ImportError:
- return
- else:
- yield pathname
- else:
- try:
- import importlib
- absolute_name = importlib.util.resolve_name(package_name, None)
- except (ImportError, AttributeError):
- # Sometimes, importlib is not available (e.g. Python 2)
- # or importlib.util is not available (e.g. Python 2.7)
- spec = None
- else:
- import sys
- for finder in sys.meta_path:
- try:
- spec = finder.find_spec(absolute_name, None)
- except (AttributeError, TypeError):
- # On Travis (Python 3.5) the above produced:
- # AttributeError: 'VendorImporter' object has no
- # attribute 'find_spec'
- #
- # ARROW-4117: When running "asv dev", TypeError is raised
- # due to the meta-importer
- spec = None
-
- if spec is not None:
- break
-
- if spec:
- module = importlib.util.module_from_spec(spec)
- for path in module.__path__:
- yield path
-
-def import_tensorflow_extension():
- """
- Load the TensorFlow extension if it exists.
-
- This is used to load the TensorFlow extension before
- pyarrow.lib. If we don't do this there are symbol clashes
- between TensorFlow's use of threading and our global
- thread pool, see also
- https://issues.apache.org/jira/browse/ARROW-2657 and
- https://github.com/apache/arrow/pull/2096.
- """
- import os
- tensorflow_loaded = False
-
- # Try to load the tensorflow extension directly
- # This is a performance optimization, tensorflow will always be
- # loaded via the "import tensorflow" statement below if this
- # doesn't succeed.
-
- for path in _iterate_python_module_paths("tensorflow"):
- ext = os.path.join(path, "libtensorflow_framework.so")
- if os.path.exists(ext):
- import ctypes
- try:
- ctypes.CDLL(ext)
- except OSError:
- pass
- tensorflow_loaded = True
- break
-
- # If the above failed, try to load tensorflow the normal way
- # (this is more expensive)
-
- if not tensorflow_loaded:
- try:
- import tensorflow
- except ImportError:
- pass
-
-def import_pytorch_extension():
- """
- Load the PyTorch extension if it exists.
-
- This is used to load the PyTorch extension before
- pyarrow.lib. If we don't do this there are symbol clashes
- between PyTorch's use of threading and our global
- thread pool, see also
- https://issues.apache.org/jira/browse/ARROW-2920
- """
- import ctypes
- import os
-
- for path in _iterate_python_module_paths("torch"):
- try:
- ctypes.CDLL(os.path.join(path, "lib/libcaffe2.so"))
- except OSError:
- # lib/libcaffe2.so only exists in pytorch starting from 0.4.0,
- # in older versions of pytorch there are not symbol clashes
- pass
-
integer_types = six.integer_types + (np.integer,)