You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/11/20 14:27:08 UTC

[arrow] branch master updated: ARROW-1778: [Python] Link parquet-cpp statically, privately in manylinux1 wheels

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cb5da9c  ARROW-1778: [Python] Link parquet-cpp statically, privately in manylinux1 wheels
cb5da9c is described below

commit cb5da9c1bc71fafd76f5cafc60ef237385a36b2b
Author: Uwe L. Korn <uw...@xhochy.com>
AuthorDate: Mon Nov 20 09:27:02 2017 -0500

    ARROW-1778: [Python] Link parquet-cpp statically, privately in manylinux1 wheels
    
    Author: Uwe L. Korn <uw...@xhochy.com>
    
    Closes #1331 from xhochy/ARROW-1778 and squashes the following commits:
    
    65cf4c6a [Uwe L. Korn] ARROW-1778: [Python] Link parquet-cpp statically, privately in manylinux1 wheels
---
 cpp/cmake_modules/FindThrift.cmake  | 102 ++++++++++++++++++++++++++++++++++++
 python/CMakeLists.txt               |  46 +++++++++++-----
 python/manylinux1/Dockerfile-x86_64 |   2 +-
 python/manylinux1/build_arrow.sh    |   6 +--
 python/setup.py                     |   7 ++-
 5 files changed, 145 insertions(+), 18 deletions(-)

diff --git a/cpp/cmake_modules/FindThrift.cmake b/cpp/cmake_modules/FindThrift.cmake
new file mode 100644
index 0000000..25f5082
--- /dev/null
+++ b/cpp/cmake_modules/FindThrift.cmake
@@ -0,0 +1,102 @@
+# Copyright 2012 Cloudera Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# - Find Thrift (a cross platform RPC lib/tool)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Thrift_HOME - When set, this path is inspected instead of standard library
+#                locations as the root of the Thrift installation.
+#                The environment variable THRIFT_HOME overrides this variable.
+#
+# This module defines
+#  THRIFT_VERSION, version string of ant if found
+#  THRIFT_INCLUDE_DIR, where to find THRIFT headers
+#  THRIFT_CONTRIB_DIR, where contrib thrift files (e.g. fb303.thrift) are installed
+#  THRIFT_STATIC_LIB, THRIFT static library
+#  THRIFT_FOUND, If false, do not try to use ant
+
+# prefer the thrift version supplied in THRIFT_HOME
+if( NOT "${THRIFT_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "${THRIFT_HOME}" _native_path )
+    list( APPEND _thrift_roots ${_native_path} )
+elseif ( Thrift_HOME )
+    list( APPEND _thrift_roots ${Thrift_HOME} )
+endif()
+
+message(STATUS "THRIFT_HOME: ${THRIFT_HOME}")
+find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h HINTS
+  ${_thrift_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "include"
+)
+
+find_path(THRIFT_CONTRIB_DIR share/fb303/if/fb303.thrift HINTS
+  ${_thrift_roots}
+  NO_DEFAULT_PATH
+)
+
+if (MSVC AND NOT THRIFT_MSVC_STATIC_LIB_SUFFIX)
+  set(THRIFT_MSVC_STATIC_LIB_SUFFIX md)
+endif()
+
+find_library(THRIFT_STATIC_LIB NAMES
+  ${CMAKE_STATIC_LIBRARY_PREFIX}thrift${THRIFT_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}
+  HINTS ${_thrift_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib"
+)
+
+find_program(THRIFT_COMPILER thrift HINTS
+  ${_thrift_roots}
+  NO_DEFAULT_PATH
+  PATH_SUFFIXES "bin"
+)
+
+if (THRIFT_STATIC_LIB)
+  set(THRIFT_FOUND TRUE)
+  exec_program(${THRIFT_COMPILER}
+    ARGS -version OUTPUT_VARIABLE THRIFT_VERSION RETURN_VALUE THRIFT_RETURN)
+else ()
+  set(THRIFT_FOUND FALSE)
+endif ()
+
+if (THRIFT_FOUND)
+  if (NOT Thrift_FIND_QUIETLY)
+    message(STATUS "Thrift version: ${THRIFT_VERSION}")
+  endif ()
+else ()
+  if (NOT Thrift_FIND_QUIETLY)
+    set(THRIFT_ERR_MSG "Thrift compiler/libraries NOT found: ${THRIFT_RETURN}")
+    set(THRIFT_ERR_MSG "${THRIFT_ERR_MSG} (${THRIFT_INCLUDE_DIR}, ${THRIFT_STATIC_LIB}).")
+    if ( _thrift_roots )
+      set(THRIFT_ERR_MSG "${THRIFT_ERR_MSG} Looked in ${_thrift_roots}.")
+    else ()
+      set(THRIFT_ERR_MSG "${THRIFT_ERR_MSG} Looked in system search paths.")
+    endif ()
+    if ( Thrift_FIND_REQUIRED )
+      message(FATAL_ERROR "${THRIFT_ERR_MSG}")
+    else ()
+      message(STATUS "${THRIFT_ERR_MSG}")
+    endif ()
+  endif ()
+endif ()
+
+
+mark_as_advanced(
+  THRIFT_STATIC_LIB
+  THRIFT_COMPILER
+  THRIFT_INCLUDE_DIR
+)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8c73482..00a4ee6 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -61,6 +61,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUILD_PARQUET
     "Build the PyArrow Parquet integration"
     OFF)
+  option(PYARROW_PARQUET_USE_SHARED
+    "Rely on parquet shared libraries where relevant"
+    ON)
   option(PYARROW_BUILD_PLASMA
     "Build the PyArrow Plasma integration"
     OFF)
@@ -282,24 +285,41 @@ if (PYARROW_BUILD_PARQUET)
   endif()
   include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
 
-  if (PYARROW_BUNDLE_ARROW_CPP)
-    bundle_arrow_lib(PARQUET_SHARED_LIB
-      ABI_VERSION ${PARQUET_ABI_VERSION}
-      SO_VERSION ${PARQUET_SO_VERSION})
+  if (PYARROW_PARQUET_USE_SHARED)
+    if (PYARROW_BUNDLE_ARROW_CPP)
+      bundle_arrow_lib(PARQUET_SHARED_LIB
+        ABI_VERSION ${PARQUET_ABI_VERSION}
+        SO_VERSION ${PARQUET_SO_VERSION})
+      if (MSVC)
+        bundle_arrow_implib(PARQUET_SHARED_IMP_LIB)
+      endif()
+    endif()
     if (MSVC)
-      bundle_arrow_implib(PARQUET_SHARED_IMP_LIB)
+      ADD_THIRDPARTY_LIB(parquet
+        SHARED_LIB ${PARQUET_SHARED_IMP_LIB})
+    else()
+      ADD_THIRDPARTY_LIB(parquet
+        SHARED_LIB ${PARQUET_SHARED_LIB})
     endif()
-  endif()
-  if (MSVC)
-    ADD_THIRDPARTY_LIB(parquet
-      SHARED_LIB ${PARQUET_SHARED_IMP_LIB})
+    set(LINK_LIBS
+      ${LINK_LIBS}
+      parquet_shared)
   else()
+    find_package(Thrift)
+    set(Boost_USE_STATIC_LIBS ON)
+    find_package(Boost COMPONENTS regex REQUIRED)
+    ADD_THIRDPARTY_LIB(boost_regex
+      STATIC_LIB ${Boost_REGEX_LIBRARY_RELEASE})
     ADD_THIRDPARTY_LIB(parquet
-      SHARED_LIB ${PARQUET_SHARED_LIB})
+      STATIC_LIB ${PARQUET_STATIC_LIB})
+    ADD_THIRDPARTY_LIB(thrift
+      STATIC_LIB ${THRIFT_STATIC_LIB})
+    set(LINK_LIBS
+      ${LINK_LIBS}
+      parquet_static
+      thrift_static
+      boost_regex_static)
   endif()
-  set(LINK_LIBS
-    ${LINK_LIBS}
-    parquet_shared)
   set(CYTHON_EXTENSIONS
     ${CYTHON_EXTENSIONS}
     _parquet)
diff --git a/python/manylinux1/Dockerfile-x86_64 b/python/manylinux1/Dockerfile-x86_64
index 69d8d3d..919a32b 100644
--- a/python/manylinux1/Dockerfile-x86_64
+++ b/python/manylinux1/Dockerfile-x86_64
@@ -29,5 +29,5 @@ RUN /check_arrow_visibility.sh
 WORKDIR /
 RUN git clone https://github.com/apache/parquet-cpp.git
 WORKDIR /parquet-cpp
-RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DPARQUET_BUILD_TESTS=OFF -DPARQUET_BOOST_USE_SHARED=OFF -GNinja .
+RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DPARQUET_BUILD_TESTS=OFF -DPARQUET_BUILD_SHARED=OFF -DPARQUET_BUILD_STATIC=ON -DPARQUET_BOOST_USE_SHARED=OFF -GNinja .
 RUN ninja install
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index 074bd00..ced4556 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -40,11 +40,11 @@ cd /arrow/python
 # PyArrow build configuration
 export PYARROW_BUILD_TYPE='release'
 export PYARROW_WITH_PARQUET=1
+export PYARROW_WITH_STATIC_PARQUET=1
 export PYARROW_WITH_PLASMA=1
 export PYARROW_BUNDLE_ARROW_CPP=1
-# Need as otherwise arrow_io is sometimes not linked
-export LDFLAGS="-Wl,--no-as-needed"
 export PKG_CONFIG_PATH=/arrow-dist/lib64/pkgconfig
+export PYARROW_CMAKE_OPTIONS='-DTHRIFT_HOME=/usr'
 # Ensure the target directory exists
 mkdir -p /io/dist
 
@@ -65,7 +65,7 @@ for PYTHON in ${PYTHON_VERSIONS}; do
     # Clear output directory
     rm -rf dist/
     echo "=== (${PYTHON}) Building wheel ==="
-    PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp
+    PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py build_ext --inplace --with-parquet --with-static-parquet --bundle-arrow-cpp
     PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py bdist_wheel
 
     echo "=== (${PYTHON}) Test the existence of optional modules ==="
diff --git a/python/setup.py b/python/setup.py
index ccab8fb..2270cf7 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -82,6 +82,7 @@ class build_ext(_build_ext):
     user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
                      ('build-type=', None, 'build type (debug or release)'),
                      ('with-parquet', None, 'build the Parquet extension'),
+                     ('with-static-parquet', None, 'link parquet statically'),
                      ('with-plasma', None, 'build the Plasma extension'),
                      ('bundle-arrow-cpp', None,
                       'bundle the Arrow C++ libraries')] +
@@ -102,6 +103,8 @@ class build_ext(_build_ext):
 
         self.with_parquet = strtobool(
             os.environ.get('PYARROW_WITH_PARQUET', '0'))
+        self.with_static_parquet = strtobool(
+            os.environ.get('PYARROW_WITH_STATIC_PARQUET', '0'))
         self.with_plasma = strtobool(
             os.environ.get('PYARROW_WITH_PLASMA', '0'))
         self.bundle_arrow_cpp = strtobool(
@@ -144,6 +147,8 @@ class build_ext(_build_ext):
 
         if self.with_parquet:
             cmake_options.append('-DPYARROW_BUILD_PARQUET=on')
+        if self.with_static_parquet:
+            cmake_options.append('-DPYARROW_PARQUET_USE_SHARED=off')
 
         if self.with_plasma:
             cmake_options.append('-DPYARROW_BUILD_PLASMA=on')
@@ -225,7 +230,7 @@ class build_ext(_build_ext):
             move_shared_libs(build_prefix, build_lib, "arrow_python")
             if self.with_plasma:
                 move_shared_libs(build_prefix, build_lib, "plasma")
-            if self.with_parquet:
+            if self.with_parquet and not self.with_static_parquet:
                 move_shared_libs(build_prefix, build_lib, "parquet")
 
         print('Bundling includes: ' + pjoin(build_prefix, 'include'))

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].