You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/11/03 17:22:31 UTC

arrow git commit: ARROW-323: [Python] Opt-in to pyarrow.parquet extension rather than attempting and failing silently

Repository: arrow
Updated Branches:
  refs/heads/master 17c9ae7c4 -> 25e010607


ARROW-323: [Python] Opt-in to pyarrow.parquet extension rather than attempting and failing silently

Added a couple ways to do this, either via the `--with-parquet` command line option (preferred) or by passing through an option to CMake

Author: Wes McKinney <we...@twosigma.com>

Closes #194 from wesm/ARROW-323 and squashes the following commits:

07c05cc [Wes McKinney] Update readme to illustrate proper use of  with build_ext
3bd9a8d [Wes McKinney] Add --with-parquet option to setup.py
374e254 [Wes McKinney] Add to README about building the parquet extension
cab55cb [Wes McKinney] Opt in to building the pyarrow.parquet extension, do not silently fail


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/25e01060
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/25e01060
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/25e01060

Branch: refs/heads/master
Commit: 25e010607542aa7330bd881e145180fe606776c5
Parents: 17c9ae7
Author: Wes McKinney <we...@twosigma.com>
Authored: Thu Nov 3 13:22:19 2016 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Nov 3 13:22:19 2016 -0400

----------------------------------------------------------------------
 python/CMakeLists.txt |  8 +++++++-
 python/README.md      | 20 +++++++++++++++++++-
 python/setup.py       | 38 ++++++++++++++++++++++++--------------
 3 files changed, 50 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/25e01060/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 179f02f..6ad55f8 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -50,6 +50,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUILD_TESTS
     "Build the PyArrow C++ googletest unit tests"
     OFF)
+  option(PYARROW_BUILD_PARQUET
+    "Build the PyArrow Parquet integration"
+    OFF)
 endif()
 
 find_program(CCACHE_FOUND ccache)
@@ -445,7 +448,10 @@ set(LINK_LIBS
   arrow_ipc
 )
 
-if(PARQUET_FOUND AND PARQUET_ARROW_FOUND)
+if (PYARROW_BUILD_PARQUET)
+  if(NOT (PARQUET_FOUND AND PARQUET_ARROW_FOUND))
+    message(FATAL_ERROR "Unable to locate Parquet libraries")
+  endif()
   ADD_THIRDPARTY_LIB(parquet_arrow
     SHARED_LIB ${PARQUET_ARROW_SHARED_LIB})
   set(LINK_LIBS

http://git-wip-us.apache.org/repos/asf/arrow/blob/25e01060/python/README.md
----------------------------------------------------------------------
diff --git a/python/README.md b/python/README.md
index 2a3e1ba..4fce0d2 100644
--- a/python/README.md
+++ b/python/README.md
@@ -48,7 +48,8 @@ python setup.py build_ext --inplace
 py.test pyarrow
 ```
 
-To change the build type, use the `--build-type` option:
+To change the build type, use the `--build-type` option or set
+`$PYARROW_BUILD_TYPE`:
 
 ```bash
 python setup.py build_ext --build-type=release --inplace
@@ -57,9 +58,26 @@ python setup.py build_ext --build-type=release --inplace
 To pass through other build options to CMake, set the environment variable
 `$PYARROW_CMAKE_OPTIONS`.
 
+#### Build the pyarrow Parquet file extension
+
+To build the integration with [parquet-cpp][1], pass `--with-parquet` to
+the `build_ext` option in setup.py:
+
+```
+python setup.py build_ext --with-parquet install
+```
+
+Alternately, add `-DPYARROW_BUILD_PARQUET=on` to the general CMake options.
+
+```
+export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on
+```
+
 #### Build the documentation
 
 ```bash
 pip install -r doc/requirements.txt
 python setup.py build_sphinx
 ```
+
+[1]: https://github.com/apache/parquet-cpp
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/25e01060/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index b3012e6..341cc64 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -97,13 +97,15 @@ class build_ext(_build_ext):
 
     description = "Build the C-extensions for arrow"
     user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
-                     ('build-type=', None, 'build type (debug or release)')]
-                    + _build_ext.user_options)
+                     ('build-type=', None, 'build type (debug or release)'),
+                     ('with-parquet', None, 'build the Parquet extension')] +
+                    _build_ext.user_options)
 
     def initialize_options(self):
         _build_ext.initialize_options(self)
         self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
         self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower()
+        self.with_parquet = False
 
     CYTHON_MODULE_NAMES = [
         'array',
@@ -116,8 +118,6 @@ class build_ext(_build_ext):
         'schema',
         'table']
 
-    CYTHON_ALLOWED_FAILURES = ['parquet']
-
     def _run_cmake(self):
         # The directory containing this setup.py
         source = osp.dirname(osp.abspath(__file__))
@@ -141,17 +141,24 @@ class build_ext(_build_ext):
             if (cachedir != build_temp):
                 return
 
-        pyexe_option = '-DPYTHON_EXECUTABLE=%s' % sys.executable
         static_lib_option = ''
         build_tests_option = ''
 
-        build_type_option = '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type)
+        cmake_options = [
+            '-DPYTHON_EXECUTABLE=%s' % sys.executable,
+            static_lib_option,
+            build_tests_option,
+        ]
+
+        if self.with_parquet:
+            cmake_options.append('-DPYARROW_BUILD_PARQUET=on')
 
         if sys.platform != 'win32':
-            cmake_command = ['cmake', self.extra_cmake_args, pyexe_option,
-                             build_tests_option,
-                             build_type_option,
-                             static_lib_option, source]
+            cmake_options.append('-DCMAKE_BUILD_TYPE={0}'
+                                 .format(self.build_type))
+
+            cmake_command = (['cmake', self.extra_cmake_args] +
+                             cmake_options + [source])
 
             self.spawn(cmake_command)
             args = ['make', 'VERBOSE=1']
@@ -166,10 +173,8 @@ class build_ext(_build_ext):
             # Generate the build files
             extra_cmake_args = shlex.split(self.extra_cmake_args)
             cmake_command = (['cmake'] + extra_cmake_args +
+                             cmake_options +
                              [source,
-                              pyexe_option,
-                              static_lib_option,
-                              build_tests_option,
                              '-G', cmake_generator])
             if "-G" in self.extra_cmake_args:
                 cmake_command = cmake_command[:-2]
@@ -202,7 +207,7 @@ class build_ext(_build_ext):
             built_path = self.get_ext_built(name)
             if not os.path.exists(built_path):
                 print(built_path)
-                if name in self.CYTHON_ALLOWED_FAILURES:
+                if self._failure_permitted(name):
                     print('Cython module {0} failure permitted'.format(name))
                     continue
                 raise RuntimeError('libpyarrow C-extension failed to build:',
@@ -219,6 +224,11 @@ class build_ext(_build_ext):
 
         os.chdir(saved_cwd)
 
+    def _failure_permitted(self, name):
+        if name == 'parquet' and not self.with_parquet:
+            return True
+        return False
+
     def _get_inplace_dir(self):
         pass