You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2019/02/28 23:17:55 UTC

[arrow] branch master updated: ARROW-4690: Building TensorFlow compatible wheels for Arrow

This is an automated email from the ASF dual-hosted git repository.

robertnishihara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b766bff  ARROW-4690: Building TensorFlow compatible wheels for Arrow
b766bff is described below

commit b766bff34b7d85034d26cebef5b3aeef1eb2fd82
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Thu Feb 28 15:17:42 2019 -0800

    ARROW-4690: Building TensorFlow compatible wheels for Arrow
    
    This includes a Dockerfile that can be used to create wheels based on ubuntu 14.04 which are compatible with TensorFlow.
    
    TODO before this can be merged:
    - [x] write documentation how to build this
    - [x] do more testing
    
    Author: Philipp Moritz <pc...@gmail.com>
    
    Closes #3766 from pcmoritz/ubuntu-wheels and squashes the following commits:
    
    f708c29b <Philipp Moritz> remove tensorflow import check
    599ce2e7 <Philipp Moritz> fix manylinux1 build instructions
    f1fbedf8 <Philipp Moritz> remove tensorflow hacks
    bf47f579 <Philipp Moritz> improve wording
    4fb1d38b <Philipp Moritz> add documentation
    078be98b <Philipp Moritz> add licenses
    0ab0bccb <Philipp Moritz> cleanup
    c7ab1395 <Philipp Moritz> fix
    eae775d5 <Philipp Moritz> update
    2820363e <Philipp Moritz> update
    ed683309 <Philipp Moritz> update
    e8c96ecf <Philipp Moritz> update
    8a3b19e8 <Philipp Moritz> update
    0fcc3730 <Philipp Moritz> update
    fd387797 <Philipp Moritz> update
    78dcf42d <Philipp Moritz> update
    7726bb6a <Philipp Moritz> update
    82ae4828 <Philipp Moritz> update
    f44082ea <Philipp Moritz> update
    deb30bfd <Philipp Moritz> update
    50e40320 <Philipp Moritz> update
    58f6c121 <Philipp Moritz> update
    5e8ca589 <Philipp Moritz> update
    5fa73dd5 <Philipp Moritz> update
    595d0fe1 <Philipp Moritz> update
    79006722 <Philipp Moritz> add libffi-dev
    9ff5236d <Philipp Moritz> update
    ca972ad0 <Philipp Moritz> update
    60805e22 <Philipp Moritz> update
    7a66ba35 <Philipp Moritz> update
    1b56d1f1 <Philipp Moritz> zlib
    eedef794 <Philipp Moritz> update
    3ae2b5ab <Philipp Moritz> update
    df297e1c <Philipp Moritz> add python build script
    358e4f85 <Philipp Moritz> update
    65afcebe <Philipp Moritz> update
    11ccfc7e <Philipp Moritz> update
    f1784245 <Philipp Moritz> update
    b3039c8b <Philipp Moritz> update
    9064c3ca <Philipp Moritz> update
    c39f92a9 <Philipp Moritz> install tensorflow
    ec4e2210 <Philipp Moritz> unicode
    773ca2b6 <Philipp Moritz> link python
    b690d64a <Philipp Moritz> update
    5ce7f0d6 <Philipp Moritz> update
    a9302fce <Philipp Moritz> install python-dev
    f12e0cfe <Philipp Moritz> multibuild python 2.7
    9342006b <Philipp Moritz> add git
    ab2ef8e7 <Philipp Moritz> fix cmake install
    cef997b5 <Philipp Moritz> install cmake and ninja
    5d560faf <Philipp Moritz> add build-essential
    adf2f705 <Philipp Moritz> add curl
    f8d66963 <Philipp Moritz> remove xz
    e439356e <Philipp Moritz> apt update
    79fe557e <Philipp Moritz> add docker image for ubuntu wheel
---
 LICENSE.txt                                     |  11 ++
 ci/travis_script_manylinux.sh                   |   1 -
 cpp/cmake_modules/FindFlatbuffers.cmake         |   2 +-
 python/manylinux1/Dockerfile-x86_64_ubuntu      |  95 +++++++++++
 python/manylinux1/README.md                     |  40 ++++-
 python/manylinux1/build_arrow.sh                |  32 ++--
 python/manylinux1/scripts/build_python.sh       | 218 ++++++++++++++++++++++++
 python/manylinux1/scripts/python-tag-abi-tag.py |  30 ++++
 python/manylinux1/scripts/requirements.txt      |  34 ++++
 python/pyarrow/__init__.py                      |   7 -
 python/pyarrow/compat.py                        | 106 ------------
 11 files changed, 448 insertions(+), 128 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index 58d6671..582daa4 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -826,3 +826,14 @@ This project includes code from Snappy.
 Copyright: 2009 Google Inc. All rights reserved.
 Homepage: https://github.com/google/snappy
 License: 3-clause BSD
+
+--------------------------------------------------------------------------------
+
+This project includes code from the manylinux project.
+
+* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py,
+  requirements.txt} are based on code from the manylinux project.
+
+Copyright: 2016 manylinux
+Homepage: https://github.com/pypa/manylinux
+License: The MIT License (MIT)
diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index fa02c82..8f2bd88 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -35,7 +35,6 @@ import pyarrow
 import pyarrow.orc
 import pyarrow.parquet
 import pyarrow.plasma
-import tensorflow
 
 if sys.version_info.major > 2:
     import pyarrow.gandiva
diff --git a/cpp/cmake_modules/FindFlatbuffers.cmake b/cpp/cmake_modules/FindFlatbuffers.cmake
index 975c869..f97286f 100644
--- a/cpp/cmake_modules/FindFlatbuffers.cmake
+++ b/cpp/cmake_modules/FindFlatbuffers.cmake
@@ -45,7 +45,7 @@ if ( _flatbuffers_roots )
         PATH_SUFFIXES "include" )
     find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers
         PATHS "${_flatbuffers_roots}" NO_DEFAULT_PATH
-        PATH_SUFFIXES "lib" "lib64")
+        PATH_SUFFIXES "lib" "lib64" "lib/x86_64-linux-gnu/")
 else ()
     find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h )
     find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers )
diff --git a/python/manylinux1/Dockerfile-x86_64_ubuntu b/python/manylinux1/Dockerfile-x86_64_ubuntu
new file mode 100644
index 0000000..c2b5247
--- /dev/null
+++ b/python/manylinux1/Dockerfile-x86_64_ubuntu
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+FROM ubuntu:14.04
+
+# Install dependencies
+RUN apt update
+RUN apt install -y ccache flex wget curl build-essential git libffi-dev autoconf pkg-config
+
+ADD scripts/build_zlib.sh /
+RUN /build_zlib.sh
+
+# Install python
+ADD scripts/requirements.txt /
+ADD scripts/build_python.sh /
+ADD scripts/python-tag-abi-tag.py /
+RUN /build_python.sh
+
+# Install cmake manylinux1 package
+ADD scripts/install_cmake.sh /
+RUN /install_cmake.sh
+
+WORKDIR /
+RUN git clone https://github.com/matthew-brett/multibuild.git && cd multibuild && git checkout ffe59955ad8690c2f8bb74766cb7e9b0d0ee3963
+
+ADD scripts/build_virtualenvs.sh /
+RUN /build_virtualenvs.sh
+
+ADD scripts/build_openssl.sh /
+RUN /build_openssl.sh
+
+ADD scripts/build_boost.sh /
+RUN /build_boost.sh
+
+ADD scripts/build_gtest.sh /
+RUN /build_gtest.sh
+ENV GTEST_HOME /usr
+
+ADD scripts/build_flatbuffers.sh /
+RUN /build_flatbuffers.sh
+ENV FLATBUFFERS_HOME /usr
+
+ADD scripts/build_bison.sh /
+RUN /build_bison.sh
+
+ADD scripts/build_thrift.sh /
+RUN /build_thrift.sh
+ENV THRIFT_HOME /usr
+
+ADD scripts/build_brotli.sh /
+RUN /build_brotli.sh
+ENV BROTLI_HOME /usr
+
+ADD scripts/build_snappy.sh /
+RUN /build_snappy.sh
+ENV SNAPPY_HOME /usr
+
+ADD scripts/build_lz4.sh /
+RUN /build_lz4.sh
+ENV LZ4_HOME /usr
+
+ADD scripts/build_zstd.sh /
+RUN /build_zstd.sh
+ENV ZSTD_HOME /usr
+
+ADD scripts/build_ccache.sh /
+RUN /build_ccache.sh
+
+ADD scripts/build_protobuf.sh /
+RUN /build_protobuf.sh
+ENV PROTOBUF_HOME /usr
+
+ADD scripts/build_glog.sh /
+RUN /build_glog.sh
+ENV GLOG_HOME /usr
+
+
+ADD scripts/build_llvm.sh /
+RUN /build_llvm.sh
+
+ADD scripts/build_clang.sh /
+RUN /build_clang.sh
diff --git a/python/manylinux1/README.md b/python/manylinux1/README.md
index de99f1c..06288f4 100644
--- a/python/manylinux1/README.md
+++ b/python/manylinux1/README.md
@@ -31,9 +31,14 @@ for all supported Python versions and place them in the `dist` folder.
 
 ### Build instructions
 
+You can build the wheels with the following
+command (this is for Python 2.7 with unicode width 16, similarly you can pass
+in `PYTHON_VERSION="3.5"`, `PYTHON_VERSION="3.6"` or `PYTHON_VERSION="3.7"` or
+use `PYTHON_VERSION="2.7"` with `UNICODE_WIDTH=32`):
+
 ```bash
 # Build the python packages
-docker run --shm-size=2g --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
+docker run --env PYTHON_VERSION="2.7" --env UNICODE_WIDTH=16 --shm-size=2g --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
 # Now the new packages are located in the dist/ folder
 ls -l dist/
 ```
@@ -53,3 +58,36 @@ build the sources are removed again so that only the binary installation of a
 dependency is persisted in the docker image. When you do local adjustments to
 this image, you need to change the name of the docker image in the `docker run`
 command.
+
+## TensorFlow compatible wheels for Arrow
+
+As TensorFlow is not compatible with the manylinux1 standard, the above
+wheels can cause segfaults if they are used together with the TensorFlow wheels
+from https://www.tensorflow.org/install/pip. We do not recommend using
+TensorFlow wheels with pyarrow manylinux1 wheels until these incompatibilities
+are addressed by the TensorFlow team [1]. For most end-users, the recommended
+way to use Arrow together with TensorFlow is through conda.
+If this is not an option for you, there is also a way to produce TensorFlow
+compatible Arrow wheels that however do not conform to the manylinux1 standard
+and are not officially supported by the Arrow community.
+
+Similar to the manylinux1 wheels, there is a base image that can be built with
+
+```bash
+docker build -t arrow_linux_x86_64_base -f Dockerfile-x86_64_ubuntu .
+```
+
+Once the image has been built, you can then build the wheels with the following
+command (this is for Python 2.7 with unicode width 16, similarly you can pass
+in `PYTHON_VERSION="3.5"`, `PYTHON_VERSION="3.6"` or `PYTHON_VERSION="3.7"` or
+use `PYTHON_VERSION="2.7"` with `UNICODE_WIDTH=32`)
+
+```bash
+# Build the python packages
+sudo docker run --env UBUNTU_WHEELS=1 --env PYTHON_VERSION="2.7" --env UNICODE_WIDTH=16 --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow arrow_linux_x86_64_base:latest /io/build_arrow.sh
+# Now the new packages are located in the dist/ folder
+ls -l dist/
+echo "Please note that these wheels are not manylinux1 compliant"
+```
+
+[1] https://groups.google.com/a/tensorflow.org/d/topic/developers/TMqRaT-H2bI/discussion
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index e0475bb..7738e3d 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -28,6 +28,9 @@ source /multibuild/manylinux_utils.sh
 # Quit on failure
 set -e
 
+# Print commands for debugging
+set -x
+
 cd /arrow/python
 
 # PyArrow build configuration
@@ -108,15 +111,19 @@ PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \
 PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel
 PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist
 
-echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ==="
-mkdir -p repaired_wheels/
-auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
+if [ -n "$UBUNTU_WHEELS" ]; then
+  echo "=== (${PYTHON_VERSION}) Wheels are not compatible with manylinux1 ==="
+  mv dist/pyarrow-*.whl /io/dist
+else
+  echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ==="
+  mkdir -p repaired_wheels/
+  auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
 
-# Install the built wheels
-$PIP install repaired_wheels/*.whl
+  # Install the built wheels
+  $PIP install repaired_wheels/*.whl
 
-# Test that the modules are importable
-$PYTHON_INTERPRETER -c "
+  # Test that the modules are importable
+  $PYTHON_INTERPRETER -c "
 import sys
 import pyarrow
 import pyarrow.orc
@@ -125,9 +132,10 @@ import pyarrow.plasma
 
 if sys.version_info.major > 2:
     import pyarrow.gandiva
-"
+  "
 
-# More thorough testing happens outsite of the build to prevent
-# packaging issues like ARROW-4372
-mv dist/*.tar.gz /io/dist
-mv repaired_wheels/*.whl /io/dist
+  # More thorough testing happens outsite of the build to prevent
+  # packaging issues like ARROW-4372
+  mv dist/*.tar.gz /io/dist
+  mv repaired_wheels/*.whl /io/dist
+fi
diff --git a/python/manylinux1/scripts/build_python.sh b/python/manylinux1/scripts/build_python.sh
new file mode 100755
index 0000000..d116d43
--- /dev/null
+++ b/python/manylinux1/scripts/build_python.sh
@@ -0,0 +1,218 @@
+#!/bin/bash -e
+# Copyright (c) 2016 manylinux
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# The following is taken from docker/build_scripts/build_env.sh,
+# docker/build_scripts/build_utils.sh and
+# docker/build_scripts/build.sh from the manylinux1 project
+# (https://github.com/pypa/manylinux/).
+
+PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
+CPYTHON_VERSIONS="2.7.15 3.4.9 3.5.6 3.6.8 3.7.2"
+
+# openssl version to build, with expected sha256 hash of .tar.gz
+# archive.
+OPENSSL_ROOT=openssl-1.0.2q
+OPENSSL_HASH=5744cfcbcec2b1b48629f7354203bc1e5e9b5466998bbccc5b5fcde3b18eb684
+OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source
+
+# Update to slightly newer, verified Git commit:
+# https://github.com/NixOS/patchelf/commit/2a9cefd7d637d160d12dc7946393778fa8abbc58
+PATCHELF_VERSION=2a9cefd7d637d160d12dc7946393778fa8abbc58
+PATCHELF_HASH=12da4727f09be42ae0b54878e1b8e86d85cb7a5b595731cdc1a0a170c4873c6d
+
+CURL_ROOT=curl-7.61.1
+CURL_HASH=eaa812e9a871ea10dbe8e1d3f8f12a64a8e3e62aeab18cb23742e2f1727458ae
+CURL_DOWNLOAD_URL=https://curl.haxx.se/download
+
+AUTOCONF_ROOT=autoconf-2.69
+AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
+AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf
+AUTOMAKE_ROOT=automake-1.16.1
+AUTOMAKE_HASH=608a97523f97db32f1f5d5615c98ca69326ced2054c9f82e65bade7fc4c9dea8
+AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake
+LIBTOOL_ROOT=libtool-2.4.6
+LIBTOOL_HASH=e3bd4d5d3d025a36c21dd6af7ea818a2afcd4dfc1ea5a17b39d7854bcd0c06e3
+LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool
+
+SQLITE_AUTOCONF_VERSION=sqlite-autoconf-3230100
+SQLITE_AUTOCONF_HASH=92842b283e5e744eff5da29ed3c69391de7368fccc4d0ee6bf62490ce555ef25
+SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2018
+
+GIT_ROOT=2.19.1
+GIT_HASH=ba2fed9d02e424b735e035c4f2b0bdb168ef0df7e35156b5051d900dc7247787
+GIT_DOWNLOAD_URL=https://github.com/git/git/archive
+
+GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
+EPEL_RPM_HASH=0dcc89f9bf67a2a515bad64569b7a9615edc5e018f676a578d5fd0f17d3c81d4
+DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
+
+function check_var {
+    if [ -z "$1" ]; then
+        echo "required variable not defined"
+        exit 1
+    fi
+}
+
+function lex_pyver {
+    # Echoes Python version string padded with zeros
+    # Thus:
+    # 3.2.1 -> 003002001
+    # 3     -> 003000000
+    echo $1 | awk -F "." '{printf "%03d%03d%03d", $1, $2, $3}'
+}
+
+function pyver_dist_dir {
+    # Echoes the dist directory name of given pyver, removing alpha/beta prerelease
+    # Thus:
+    # 3.2.1   -> 3.2.1
+    # 3.7.0b4 -> 3.7.0
+    echo $1 | awk -F "." '{printf "%d.%d.%d", $1, $2, $3}'
+}
+
+function do_cpython_build {
+    local py_ver=$1
+    check_var $py_ver
+    local ucs_setting=$2
+    check_var $ucs_setting
+    tar -xzf Python-$py_ver.tgz
+    pushd Python-$py_ver
+    if [ "$ucs_setting" = "none" ]; then
+        unicode_flags=""
+        dir_suffix=""
+    else
+        local unicode_flags="--enable-unicode=$ucs_setting"
+        local dir_suffix="-$ucs_setting"
+    fi
+    local prefix="/opt/_internal/cpython-${py_ver}${dir_suffix}"
+    mkdir -p ${prefix}/lib
+    ./configure --prefix=${prefix} --disable-shared $unicode_flags > /dev/null
+    make -j2 > /dev/null
+    make install > /dev/null
+    popd
+    rm -rf Python-$py_ver
+    # Some python's install as bin/python3. Make them available as
+    # bin/python.
+    if [ -e ${prefix}/bin/python3 ]; then
+        ln -s python3 ${prefix}/bin/python
+    fi
+    # --force-reinstall is to work around:
+    #   https://github.com/pypa/pip/issues/5220
+    #   https://github.com/pypa/get-pip/issues/19
+    ${prefix}/bin/python get-pip.py --force-reinstall
+    if [ -e ${prefix}/bin/pip3 ] && [ ! -e ${prefix}/bin/pip ]; then
+        ln -s pip3 ${prefix}/bin/pip
+    fi
+    # Since we fall back on a canned copy of get-pip.py, we might not have
+    # the latest pip and friends. Upgrade them to make sure.
+    ${prefix}/bin/pip install -U --require-hashes -r ${MY_DIR}/requirements.txt
+    local abi_tag=$(${prefix}/bin/python ${MY_DIR}/python-tag-abi-tag.py)
+    ln -s ${prefix} /opt/python/${abi_tag}
+}
+
+
+function build_cpython {
+    local py_ver=$1
+    check_var $py_ver
+    check_var $PYTHON_DOWNLOAD_URL
+    local py_dist_dir=$(pyver_dist_dir $py_ver)
+    curl -fsSLO $PYTHON_DOWNLOAD_URL/$py_dist_dir/Python-$py_ver.tgz
+    curl -fsSLO $PYTHON_DOWNLOAD_URL/$py_dist_dir/Python-$py_ver.tgz.asc
+    if [ $(lex_pyver $py_ver) -lt $(lex_pyver 3.3) ]; then
+        do_cpython_build $py_ver ucs2
+        do_cpython_build $py_ver ucs4
+    else
+        do_cpython_build $py_ver none
+    fi
+    rm -f Python-$py_ver.tgz
+    rm -f Python-$py_ver.tgz.asc
+}
+
+
+function build_cpythons {
+    check_var $GET_PIP_URL
+    curl -fsSLO $GET_PIP_URL
+    for py_ver in $@; do
+        build_cpython $py_ver
+    done
+    rm -f get-pip.py
+}
+
+function do_openssl_build {
+    ./config no-ssl2 no-shared -fPIC --prefix=/usr/local/ssl > /dev/null
+    make > /dev/null
+    make install_sw > /dev/null
+}
+
+
+function check_required_source {
+    local file=$1
+    check_var ${file}
+    if [ ! -f $file ]; then
+        echo "Required source archive must be prefetched to docker/sources/ with prefetch.sh: $file"
+        return 1
+    fi
+}
+
+
+function fetch_source {
+    # This is called both inside and outside the build context (e.g. in Travis) to prefetch
+    # source tarballs, where curl exists (and works)
+    local file=$1
+    check_var ${file}
+    local url=$2
+    check_var ${url}
+    if [ -f ${file} ]; then
+        echo "${file} exists, skipping fetch"
+    else
+        curl -fsSL -o ${file} ${url}/${file}
+    fi
+}
+
+
+function check_sha256sum {
+    local fname=$1
+    check_var ${fname}
+    local sha256=$2
+    check_var ${sha256}
+
+    echo "${sha256}  ${fname}" > ${fname}.sha256
+    sha256sum -c ${fname}.sha256
+    rm -f ${fname}.sha256
+}
+
+
+function build_openssl {
+    local openssl_fname=$1
+    check_var ${openssl_fname}
+    local openssl_sha256=$2
+    check_var ${openssl_sha256}
+    # Can't use curl here because we don't have it yet, OpenSSL must be prefetched
+    fetch_source ${openssl_fname}.tar.gz ${OPENSSL_DOWNLOAD_URL}
+    check_sha256sum ${openssl_fname}.tar.gz ${openssl_sha256}
+    tar -xzf ${openssl_fname}.tar.gz
+    (cd ${openssl_fname} && do_openssl_build)
+    rm -rf ${openssl_fname} ${openssl_fname}.tar.gz
+}
+
+build_openssl $OPENSSL_ROOT $OPENSSL_HASH
+
+mkdir -p /opt/python
+build_cpythons $CPYTHON_VERSIONS
diff --git a/python/manylinux1/scripts/python-tag-abi-tag.py b/python/manylinux1/scripts/python-tag-abi-tag.py
new file mode 100644
index 0000000..212ab54
--- /dev/null
+++ b/python/manylinux1/scripts/python-tag-abi-tag.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2016 manylinux
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# The following is taken from docker/build_scripts/python-tag-abi-tag.py
+# from the manylinux1 project (https://github.com/pypa/manylinux/).
+
+# Utility script to print the python tag + the abi tag for a Python
+# See PEP 425 for exactly what these are, but an example would be:
+#   cp27-cp27mu
+
+from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+
+print("{0}{1}-{2}".format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))
diff --git a/python/manylinux1/scripts/requirements.txt b/python/manylinux1/scripts/requirements.txt
new file mode 100644
index 0000000..38a32df
--- /dev/null
+++ b/python/manylinux1/scripts/requirements.txt
@@ -0,0 +1,34 @@
+# Copyright (c) 2016 manylinux
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# The following is taken from docker/build_scripts/requirements.txt
+# from the manylinux1 project (https://github.com/pypa/manylinux/).
+
+# pip requirements for all cpythons
+# NOTE: pip has GPG signatures; could download and verify independently.
+pip==19.0.3 \
+    --hash=sha256:6e6f197a1abfb45118dbb878b5c859a0edbdd33fd250100bc015b67fded4b9f2 \
+    --hash=sha256:bd812612bbd8ba84159d9ddc0266b7fbce712fc9bc98c82dee5750546ec8ec64
+wheel==0.31.1 \
+    --hash=sha256:80044e51ec5bbf6c894ba0bc48d26a8c20a9ba629f4ca19ea26ecfcf87685f5f \
+    --hash=sha256:0a2e54558a0628f2145d2fc822137e322412115173e8a2ddbe1c9024338ae83c
+setuptools==40.7.3 \
+    --hash=sha256:4f4acaf06d617dccfd3fbbc9fbd83cf4749759a1fa2bdf589206a3278e0d537a \
+    --hash=sha256:702fdd31cb10a65a94beba1a7d89219a58d2587a349e0a1b7827b133e99ca430
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index dabcdf1..e74a9a1 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -44,13 +44,6 @@ except ImportError:
 
 import pyarrow.compat as compat
 
-# Workaround for https://issues.apache.org/jira/browse/ARROW-2657
-# and https://issues.apache.org/jira/browse/ARROW-2920
-if _sys.platform in ('linux', 'linux2'):
-    compat.import_tensorflow_extension()
-    compat.import_pytorch_extension()
-
-
 from pyarrow.lib import cpu_count, set_cpu_count
 from pyarrow.lib import (null, bool_,
                          int8, int16, int32, int64,
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index ee924ed..e41de73 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -163,112 +163,6 @@ def encode_file_path(path):
     # will convert utf8 to utf16
     return encoded_path
 
-def _iterate_python_module_paths(package_name):
-    """
-    Return an iterator to full paths of a python package.
-
-    This is a best effort and might fail.
-    It uses the official way of loading modules from
-    https://docs.python.org/3/library/importlib.html#approximating-importlib-import-module
-    """
-    if PY2:
-        import imp
-        try:
-            _, pathname, _ = imp.find_module(package_name)
-        except ImportError:
-            return
-        else:
-            yield pathname
-    else:
-        try:
-            import importlib
-            absolute_name = importlib.util.resolve_name(package_name, None)
-        except (ImportError, AttributeError):
-            # Sometimes, importlib is not available (e.g. Python 2)
-            # or importlib.util is not available (e.g. Python 2.7)
-            spec = None
-        else:
-            import sys
-            for finder in sys.meta_path:
-                try:
-                    spec = finder.find_spec(absolute_name, None)
-                except (AttributeError, TypeError):
-                    # On Travis (Python 3.5) the above produced:
-                    # AttributeError: 'VendorImporter' object has no
-                    # attribute 'find_spec'
-                    #
-                    # ARROW-4117: When running "asv dev", TypeError is raised
-                    # due to the meta-importer
-                    spec = None
-
-                if spec is not None:
-                    break
-
-        if spec:
-            module = importlib.util.module_from_spec(spec)
-            for path in module.__path__:
-                yield path
-
-def import_tensorflow_extension():
-    """
-    Load the TensorFlow extension if it exists.
-
-    This is used to load the TensorFlow extension before
-    pyarrow.lib. If we don't do this there are symbol clashes
-    between TensorFlow's use of threading and our global
-    thread pool, see also
-    https://issues.apache.org/jira/browse/ARROW-2657 and
-    https://github.com/apache/arrow/pull/2096.
-    """
-    import os
-    tensorflow_loaded = False
-
-    # Try to load the tensorflow extension directly
-    # This is a performance optimization, tensorflow will always be
-    # loaded via the "import tensorflow" statement below if this
-    # doesn't succeed.
-
-    for path in _iterate_python_module_paths("tensorflow"):
-        ext = os.path.join(path, "libtensorflow_framework.so")
-        if os.path.exists(ext):
-            import ctypes
-            try:
-                ctypes.CDLL(ext)
-            except OSError:
-                pass
-            tensorflow_loaded = True
-            break
-
-    # If the above failed, try to load tensorflow the normal way
-    # (this is more expensive)
-
-    if not tensorflow_loaded:
-        try:
-            import tensorflow
-        except ImportError:
-            pass
-
-def import_pytorch_extension():
-    """
-    Load the PyTorch extension if it exists.
-
-    This is used to load the PyTorch extension before
-    pyarrow.lib. If we don't do this there are symbol clashes
-    between PyTorch's use of threading and our global
-    thread pool, see also
-    https://issues.apache.org/jira/browse/ARROW-2920
-    """
-    import ctypes
-    import os
-
-    for path in _iterate_python_module_paths("torch"):
-        try:
-            ctypes.CDLL(os.path.join(path, "lib/libcaffe2.so"))
-        except OSError:
-            # lib/libcaffe2.so only exists in pytorch starting from 0.4.0,
-            # in older versions of pytorch there are not symbol clashes
-            pass
-
 
 integer_types = six.integer_types + (np.integer,)