You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/03/26 17:43:19 UTC

[arrow] branch master updated: ARROW-4645: [C++/Packaging] Ship Gandiva with OSX and Windows wheels

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c174f4  ARROW-4645: [C++/Packaging] Ship Gandiva with OSX and Windows wheels
9c174f4 is described below

commit 9c174f44b73bb5c3b9ccf2bca9e0396a2592efae
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Tue Mar 26 12:43:10 2019 -0500

    ARROW-4645: [C++/Packaging] Ship Gandiva with OSX and Windows wheels
    
    crossbow builds:
    - [kszucs/crossbow/build-448](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-448)
    
    Author: Krisztián Szűcs <sz...@gmail.com>
    
    Closes #3832 from kszucs/ARROW-4645 and squashes the following commits:
    
    97d1f1186 <Krisztián Szűcs> hopefully a relative path issue
    0af03a08c <Krisztián Szűcs> single-quote artifacts path
    2196d5b3b <Krisztián Szűcs> appveyor artifact deployment issues
    e49310909 <Krisztián Szűcs> use move instead of mv on appveyor, this fixes the artifact uploading
    f2935eab8 <Krisztián Szűcs> remove centos images because they have mixed unicode width
    394557194 <Krisztián Szűcs> don't import tensorflow
    6d242bd49 <Krisztián Szűcs> make builds less verbose
    0d1f5a28d <Krisztián Szűcs> correct python 27 docker images
    0c6759723 <Krisztián Szűcs> remove llvm after building; remove not existing fedora image
    781ad2eaa <Krisztián Szűcs> add centos test image for python 27 as well
    f8bdc41da <Krisztián Szűcs> test linux wheels on centos images
    536fab586 <Krisztián Szűcs> fix yml syntax for travis
    48ca6682c <Krisztián Szűcs> cleanup
    6acc93b66 <Krisztián Szűcs> remove rdp
    4deee9f20 <Krisztián Szűcs> bundle re2
    877b39e5e <Krisztián Szűcs> enable gandiva
    764f58373 <Krisztián Szűcs> enable parquet
    64d4eaed7 <Krisztián Szűcs> uncomment brotli's ARROW_LINK_LIBS
    1d1a18258 <Krisztián Szűcs> bundle brotli
    15f161cf0 <Krisztián Szűcs> turn off brotli
    ee24aab1e <Krisztián Szűcs> don't initialize SHARED_LIB_NAME
    b4f007e12 <Krisztián Szűcs> remove brotli link libs
    6d045fa3a <Krisztián Szűcs> messages
    fb487a209 <Krisztián Szűcs> try to bundle brotli as well
    907a23dfd <Krisztián Szűcs> pin llvm@7 for osx wheels
    5dd6d7925 <Krisztián Szűcs> turn off gandiva and parquet
    5a928b743 <Krisztián Szűcs> dumpbin again with absolute path
    676f28f9e <Krisztián Szűcs> debub appveyor
    42475bfb1 <Krisztián Szűcs> debug appveyor
    77561297e <Krisztián Szűcs> blockRdp on appveyor
    ef018b844 <Krisztián Szűcs> start remote desktop earlier
    ff3cd46f8 <Krisztián Szűcs> enable appveyor remote desktop
    20b84e246 <Krisztián Szűcs> try dumpbin pyarrow on appveyor
    193898a27 <Krisztián Szűcs> try ldd pyarrow on appveyor
    50604df69 <Krisztián Szűcs> show installed pyarrow files
    a02cada55 <Krisztián Szűcs> don't build cpp tests
    351aef041 <Krisztián Szűcs> verbose pip install
    dc4222c1f <Krisztián Szűcs> enable gandiva for win wheels
    94266f32e <Krisztián Szűcs> try windows build without gandiva
    2392adabd <Krisztián Szűcs> fix env variable names in windows build
    b7579405b <Krisztián Szűcs> missing re2
    d100bd439 <Krisztián Szűcs> missing protobuf
    84e17dab7 <Krisztián Szűcs> install llvmdev on windows
    b6a99a714 <Krisztián Szűcs> proper spelling for boost_root
    31f80d0f4 <Krisztián Szűcs> OSX: verbose thirdparty build; build boost  multiprecision
    330d95ded <Krisztián Szűcs> install llvm via brew
    1d051b8d9 <Krisztián Szűcs> we don't build 2.7 wheels for windows
    ab1851f4d <Krisztián Szűcs> bundle gandiva with the windows wheels
    207ff06af <Krisztián Szűcs> bundle gandiva with osx wheels
---
 dev/tasks/python-wheels/appveyor.yml   | 10 +++---
 dev/tasks/python-wheels/linux-test.sh  |  9 +++---
 dev/tasks/python-wheels/osx-build.sh   | 38 ++++++++++++++++-------
 dev/tasks/python-wheels/travis.osx.yml |  5 ++-
 dev/tasks/python-wheels/win-build.bat  | 57 ++++++++++++++++------------------
 python/CMakeLists.txt                  | 38 ++++++++++++-----------
 6 files changed, 88 insertions(+), 69 deletions(-)

diff --git a/dev/tasks/python-wheels/appveyor.yml b/dev/tasks/python-wheels/appveyor.yml
index be6ad30..817325c 100644
--- a/dev/tasks/python-wheels/appveyor.yml
+++ b/dev/tasks/python-wheels/appveyor.yml
@@ -20,8 +20,8 @@ os: Visual Studio 2015
 environment:
   ARCH: "64"
   GENERATOR: Visual Studio 14 2015 Win64
-  NUMPY: "1.14.5"
-  PYTHON: "{{ python_version }}"
+  NUMPY_VERSION: "1.14.5"
+  PYTHON_VERSION: "{{ python_version }}"
   MSVC_DEFAULT_OPTIONS: ON
   ARROW_SRC: C:\apache-arrow
   PYARROW_VERSION: {{ arrow.no_rc_version }}
@@ -33,15 +33,17 @@ init:
 
 build_script:
   - mkdir wheels
+  - git config core.symlinks true
   - git clone -b {{ arrow.branch }} {{ arrow.remote }} %ARROW_SRC% || exit /B
   - git -C %ARROW_SRC% checkout {{ arrow.head }} || exit /B
   - call %ARROW_SRC%\dev\tasks\python-wheels\win-build.bat
 
 after_build:
-  xcopy %ARROW_SRC%\python\dist\* wheels\
+  # the artifacts must be uploaded from a directory relative to the build root
+  - xcopy %ARROW_SRC%\python\dist\* wheels\
 
 artifacts:
-  - path: wheels\*.whl
+  - path: 'wheels\*.whl'
 
 deploy:
   release: {{ task.tag }}
diff --git a/dev/tasks/python-wheels/linux-test.sh b/dev/tasks/python-wheels/linux-test.sh
index dcd77eb..4abe6f3 100755
--- a/dev/tasks/python-wheels/linux-test.sh
+++ b/dev/tasks/python-wheels/linux-test.sh
@@ -17,8 +17,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+set -e
+
 # Install built wheel
-pip install /arrow/python/manylinux1/dist/*.whl
+pip install -q /arrow/python/manylinux1/dist/*.whl
 
 # Runs tests on installed distribution from an empty directory
 python --version
@@ -30,12 +32,11 @@ import pyarrow
 import pyarrow.orc
 import pyarrow.parquet
 import pyarrow.plasma
-import tensorflow
 
 if sys.version_info.major > 2:
     import pyarrow.gandiva
 "
 
 # Run pyarrow tests
-pip install -r /arrow/python/requirements-test.txt
-pytest --pyargs pyarrow
+pip install -q -r /arrow/python/requirements-test.txt
+pytest -v --pyargs pyarrow
diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh
index 22c44c1..9fef5b9 100755
--- a/dev/tasks/python-wheels/osx-build.sh
+++ b/dev/tasks/python-wheels/osx-build.sh
@@ -35,10 +35,10 @@ function build_wheel {
 
     pushd $1
 
-    boost_version="1.65.1"
+    boost_version="1.66.0"
     boost_directory_name="boost_${boost_version//\./_}"
     boost_tarball_name="${boost_directory_name}.tar.gz"
-    wget --no-check-certificate \
+    wget -nv --no-check-certificate \
         http://downloads.sourceforge.net/project/boost/boost/"${boost_version}"/"${boost_tarball_name}" \
         -O "${boost_tarball_name}"
     tar xf "${boost_tarball_name}"
@@ -60,7 +60,7 @@ function build_wheel {
     ./b2 tools/bcp > /dev/null 2>&1
     ./dist/bin/bcp --namespace=arrow_boost --namespace-alias \
         filesystem date_time system regex build algorithm locale format \
-        "$arrow_boost" > /dev/null 2>&1
+        multiprecision/cpp_int "$arrow_boost" > /dev/null 2>&1
     popd
 
     # Now build our custom namespaced Boost version.
@@ -102,11 +102,21 @@ function build_wheel {
 
     pip install $(pip_opts) -r python/requirements-wheel.txt cython
 
+    if [ ${MB_PYTHON_VERSION} != "2.7" ]; then
+      # Gandiva is not supported on Python 2.7
+      export PYARROW_WITH_GANDIVA=1
+      export BUILD_ARROW_GANDIVA=ON
+    else
+      export PYARROW_WITH_GANDIVA=0
+      export BUILD_ARROW_GANDIVA=OFF
+    fi
+
     pushd cpp
     mkdir build
     pushd build
     cmake -DCMAKE_BUILD_TYPE=Release \
           -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+          -DARROW_VERBOSE_THIRDPARTY_BUILD=ON \
           -DARROW_BUILD_TESTS=OFF \
           -DARROW_BUILD_SHARED=ON \
           -DARROW_BOOST_USE_SHARED=ON \
@@ -115,6 +125,7 @@ function build_wheel {
           -DARROW_RPATH_ORIGIN=ON \
           -DARROW_PYTHON=ON \
           -DARROW_PARQUET=ON \
+          -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
           -DARROW_ORC=ON \
           -DBOOST_ROOT="$arrow_boost_dist" \
           -DBoost_NAMESPACE=arrow_boost \
@@ -132,6 +143,7 @@ function build_wheel {
     unset ARROW_HOME
     unset PARQUET_HOME
 
+    export PYARROW_WITH_PLASMA=1
     export PYARROW_WITH_PARQUET=1
     export PYARROW_WITH_ORC=1
     export PYARROW_WITH_JEMALLOC=1
@@ -139,13 +151,11 @@ function build_wheel {
     export PYARROW_BUNDLE_BOOST=1
     export PYARROW_BUNDLE_ARROW_CPP=1
     export PYARROW_BUILD_TYPE='release'
+    export PYARROW_BOOST_NAMESPACE='arrow_boost'
     export PYARROW_CMAKE_OPTIONS="-DBOOST_ROOT=$arrow_boost_dist"
     export SETUPTOOLS_SCM_PRETEND_VERSION=$PYARROW_VERSION
     pushd python
-    python setup.py build_ext \
-           --with-plasma --with-orc --with-parquet \
-           --bundle-arrow-cpp --bundle-boost --boost-namespace=arrow_boost \
-           bdist_wheel
+    python setup.py build_ext bdist_wheel
     ls -l dist/
     popd
 
@@ -168,10 +178,16 @@ function install_run {
     python --version
 
     # Test optional dependencies
-    python -c "import pyarrow"
-    python -c "import pyarrow.orc"
-    python -c "import pyarrow.parquet"
-    python -c "import pyarrow.plasma"
+    python -c "
+import sys
+import pyarrow
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+
+if sys.version_info.major > 2:
+    import pyarrow.gandiva
+"
 
     # Run pyarrow tests
     pip install $(pip_opts) -r python/requirements-test.txt
diff --git a/dev/tasks/python-wheels/travis.osx.yml b/dev/tasks/python-wheels/travis.osx.yml
index a988413..67ded34 100644
--- a/dev/tasks/python-wheels/travis.osx.yml
+++ b/dev/tasks/python-wheels/travis.osx.yml
@@ -41,7 +41,7 @@ before_install:
   - brew uninstall boost cgal postgis sfcgal
   - brew update
   - brew upgrade cmake
-  - brew install bison flex
+  - brew install bison flex llvm@7
 
   - export CONFIG_PATH=`pwd`/arrow/dev/tasks/python-wheels/osx-build.sh
   - source multibuild/common_utils.sh
@@ -57,6 +57,9 @@ install:
 
   # the following functions are defined in osx-build.sh
   - build_wheel arrow
+
+  # test the built wheels, remove llvm to ensure it links statically
+  - brew uninstall llvm@7
   - install_run arrow
 
   # move built wheels to a top level directory
diff --git a/dev/tasks/python-wheels/win-build.bat b/dev/tasks/python-wheels/win-build.bat
index 02cd57d..d398855 100644
--- a/dev/tasks/python-wheels/win-build.bat
+++ b/dev/tasks/python-wheels/win-build.bat
@@ -17,31 +17,23 @@
 
 @echo on
 
+@rem create conda environment for compiling
 conda update --yes --quiet conda
 
-conda create -n arrow -q -y python=%PYTHON% ^
-      six pytest setuptools numpy=%NUMPY% pandas
+conda create -n arrow -q -y python=%PYTHON_VERSION% ^
+      six pytest setuptools numpy=%NUMPY_VERSION% pandas
 
 conda install -n arrow -q -y -c conda-forge ^
       git flatbuffers rapidjson ^
       cmake ^
       boost-cpp thrift-cpp ^
-      gflags snappy zlib brotli zstd lz4-c double-conversion
+      gflags snappy zlib zstd lz4-c double-conversion ^
+      llvmdev libprotobuf
 
 call activate arrow
 
-pushd %ARROW_SRC%
-
-@rem fix up symlinks
-git config core.symlinks true
-git reset --hard || exit /B
-git checkout "%PYARROW_REF%" || exit /B
-
-popd
-
 set ARROW_HOME=%CONDA_PREFIX%\Library
 set PARQUET_HOME=%CONDA_PREFIX%\Library
-
 echo %ARROW_HOME%
 
 @rem Build and test Arrow C++ libraries
@@ -53,41 +45,44 @@ cmake -G "%GENERATOR%" ^
       -DARROW_BOOST_USE_SHARED=OFF ^
       -DARROW_BUILD_TESTS=OFF ^
       -DCMAKE_BUILD_TYPE=Release ^
+      -DBrotli_SOURCE=BUNDLED ^
+      -DRE2_SOURCE=BUNDLED ^
       -DARROW_CXXFLAGS="/MP" ^
       -DARROW_PYTHON=ON ^
       -DARROW_PARQUET=ON ^
+      -DARROW_GANDIVA=ON ^
       ..  || exit /B
 cmake --build . --target INSTALL --config Release  || exit /B
-
-@rem Needed so python-test.exe works
-set PYTHONPATH=%CONDA_PREFIX%\Lib;%CONDA_PREFIX%\Lib\site-packages;%CONDA_PREFIX%\python35.zip;%CONDA_PREFIX%\DLLs;%CONDA_PREFIX%
-ctest -VV  || exit /B
 popd
 
-@rem Build and import pyarrow
-set PYTHONPATH=
-
 pushd %ARROW_SRC%\python
 set PYARROW_BUILD_TYPE=Release
+@rem Gandiva is not supported on Python 2.7, but We don't build 2.7 wheel for windows
+set PYARROW_WITH_GANDIVA=1
+set PYARROW_WITH_PARQUET=1
+set PYARROW_WITH_STATIC_BOOST=1
+set PYARROW_BUNDLE_ARROW_CPP=1
 set SETUPTOOLS_SCM_PRETEND_VERSION=%PYARROW_VERSION%
 
 @rem Newer Cython versions are not available on conda-forge
 pip install -U pip
 pip install "Cython>=0.29"
 
-python setup.py build_ext ^
-       --with-parquet ^
-       --with-static-boost ^
-       --bundle-arrow-cpp ^
-       bdist_wheel  || exit /B
+python setup.py build_ext bdist_wheel || exit /B
 popd
 
-@rem test the wheel
 call deactivate
-conda create -n wheel-test -q -y python=%PYTHON% ^
-      numpy=%NUMPY% pandas pytest hypothesis
+
+@rem test the wheel
+conda create -n wheel-test -q -y python=%PYTHON_VERSION% ^
+      numpy=%NUMPY_VERSION% pandas pytest hypothesis
 call activate wheel-test
 
-pip install --no-index --find-links=%ARROW_SRC%\python\dist\ pyarrow
-python -c "import pyarrow; import pyarrow.parquet"
-pytest --pyargs pyarrow
+@rem install the built wheel
+pip install -vv --no-index --find-links=%ARROW_SRC%\python\dist\ pyarrow
+
+@rem test the imports
+python -c "import pyarrow; import pyarrow.parquet; import pyarrow.gandiva;" || exit /B
+
+@rem run the python tests
+pytest --pyargs pyarrow || exit /B
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index b195210..d1f340e 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -268,30 +268,33 @@ function(bundle_boost_lib library_path)
   endif()
 endfunction()
 
-function(bundle_zlib)
-  # We can assume that manylinux1 and macosx have system zlib.
-  # See https://mail.python.org/mm3/archives/list/distutils-sig@python.org/thread/ZZG6GL3XTBLBJXSITYHEXMFKN43EREB7/
-  # for manylinux1.
+# We can assume that manylinux1 and macosx have system zlib.
+# See https://mail.python.org/mm3/archives/list/distutils-sig@python.org/thread/ZZG6GL3XTBLBJXSITYHEXMFKN43EREB7/
+# for manylinux1.
+# zlib uses zlib.dll for Windows
+function(bundle_arrow_dependency library_name)
   if(MSVC)
-    # zlib uses zlib.dll for Windows
-    set(ZLIB_SHARED_LIB_NAME zlib.dll)
+    set(SHARED_LIB_NAME "${library_name}.dll")
     if(DEFINED ENV{CONDA_PREFIX})
-      set(ZLIB_HOME "$ENV{CONDA_PREFIX}\\Library")
+      set(SHARED_LIB_HOME "$ENV{CONDA_PREFIX}\\Library")
     endif()
-    if(DEFINED ENV{ZLIB_HOME})
-      set(ZLIB_HOME "$ENV{ZLIB_HOME}")
+    if(DEFINED ENV{${library_name}_HOME})
+      set(SHARED_LIB_HOME "$ENV{${library_name}_HOME}")
     endif()
-    if("${ZLIB_HOME}" STREQUAL "")
-      find_library(ZLIB_SHARED_LIB NAMES ${ZLIB_SHARED_LIB_NAME})
+    if("${SHARED_LIB_HOME}" STREQUAL "")
+      find_library(SHARED_LIB_PATH NAMES ${SHARED_LIB_NAME})
     else()
-      find_library(ZLIB_SHARED_LIB
-                   NAMES ${ZLIB_SHARED_LIB_NAME}
-                   PATHS ${ZLIB_HOME}
+      find_library(SHARED_LIB_PATH
+                   NAMES ${SHARED_LIB_NAME}
+                   PATHS ${SHARED_LIB_HOME}
                    NO_DEFAULT_PATH
                    PATH_SUFFIXES "bin")
     endif()
-    if(ZLIB_SHARED_LIB)
-      file(COPY ${ZLIB_SHARED_LIB} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY})
+    if(SHARED_LIB_PATH)
+      message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_PATH}")
+      file(COPY ${SHARED_LIB_PATH} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY})
+    else()
+      message(FATAL_ERROR "Unable to bundle dependency: ${library_name}")
     endif()
   endif()
 endfunction()
@@ -346,11 +349,10 @@ if(PYARROW_BUNDLE_ARROW_CPP)
     bundle_boost_lib(Boost_SYSTEM_LIBRARY)
   endif()
 
-  bundle_zlib()
-
   if(MSVC)
     bundle_arrow_implib(ARROW_SHARED_IMP_LIB)
     bundle_arrow_implib(ARROW_PYTHON_SHARED_IMP_LIB)
+    bundle_arrow_dependency(zlib)
   endif()
 endif()