You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/26 15:45:47 UTC
[5/5] arrow git commit: ARROW-341: [Python] Move pyarrow's C++ code
to the main C++ source tree, install libarrow_python and headers
ARROW-341: [Python] Move pyarrow's C++ code to the main C++ source tree, install libarrow_python and headers
This will enable third parties to link to `libarrow_python`.
For now, the pyarrow build system continues to use CMake -- for the purpose of resolving the thirdparty toolchain we may or may not want to go completely to distutils, but we can sort that out later.
Author: Wes McKinney <we...@twosigma.com>
Closes #440 from wesm/ARROW-341 and squashes the following commits:
193bc51 [Wes McKinney] Ensure that '-undefined dynamic_lookup' is passed when linking shared library on OS X
a93496b [Wes McKinney] Add missing backslash
7620f50 [Wes McKinney] Fix cpplint issues
0617c69 [Wes McKinney] Fix LD_LIBRARY_PATH, ARROW_HOME
090c78c [Wes McKinney] Build Arrow library stack specific to active Python version
10e4626 [Wes McKinney] Get Python test suite passing again
cfb7f44 [Wes McKinney] Remove print statement
c1e63dc [Wes McKinney] Scrubbing python/CMakeLists.txt
b80b153 [Wes McKinney] Cleanup, build pandas-test within main test suite
7ef1f81 [Wes McKinney] Start moving python/src/pyarrow tp cpp/src/arrow/python
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3aac4ade
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3aac4ade
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3aac4ade
Branch: refs/heads/master
Commit: 3aac4adef11345f211e4c66467ff758cbc397e43
Parents: 6d4e862
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Mar 26 11:45:38 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Mar 26 11:45:38 2017 -0400
----------------------------------------------------------------------
ci/travis_script_python.sh | 26 +-
cpp/CMakeLists.txt | 115 +-
cpp/cmake_modules/BuildUtils.cmake | 88 +-
cpp/cmake_modules/FindNumPy.cmake | 100 ++
cpp/cmake_modules/FindPythonLibsNew.cmake | 241 +++
cpp/src/arrow/python/CMakeLists.txt | 93 +
cpp/src/arrow/python/api.h | 27 +
cpp/src/arrow/python/builtin_convert.cc | 527 ++++++
cpp/src/arrow/python/builtin_convert.h | 54 +
cpp/src/arrow/python/common.cc | 68 +
cpp/src/arrow/python/common.h | 139 ++
cpp/src/arrow/python/config.cc | 35 +
cpp/src/arrow/python/config.h | 45 +
cpp/src/arrow/python/do_import_numpy.h | 21 +
cpp/src/arrow/python/helpers.cc | 55 +
cpp/src/arrow/python/helpers.h | 35 +
cpp/src/arrow/python/io.cc | 222 +++
cpp/src/arrow/python/io.h | 99 ++
cpp/src/arrow/python/numpy_interop.h | 60 +
cpp/src/arrow/python/pandas-test.cc | 64 +
cpp/src/arrow/python/pandas_convert.cc | 1936 +++++++++++++++++++++
cpp/src/arrow/python/pandas_convert.h | 79 +
cpp/src/arrow/python/type_traits.h | 213 +++
cpp/src/arrow/python/util/CMakeLists.txt | 39 +
cpp/src/arrow/python/util/datetime.h | 42 +
cpp/src/arrow/python/util/test_main.cc | 36 +
python/CMakeLists.txt | 215 +--
python/cmake_modules/FindArrow.cmake | 9 +
python/cmake_modules/FindNumPy.cmake | 100 --
python/cmake_modules/FindPythonLibsNew.cmake | 241 ---
python/pyarrow/config.pyx | 14 +-
python/pyarrow/includes/pyarrow.pxd | 6 +-
python/setup.py | 11 +-
python/src/pyarrow/CMakeLists.txt | 22 -
python/src/pyarrow/adapters/builtin.cc | 527 ------
python/src/pyarrow/adapters/builtin.h | 54 -
python/src/pyarrow/adapters/pandas-test.cc | 64 -
python/src/pyarrow/adapters/pandas.cc | 1936 ---------------------
python/src/pyarrow/adapters/pandas.h | 79 -
python/src/pyarrow/api.h | 26 -
python/src/pyarrow/common.cc | 69 -
python/src/pyarrow/common.h | 137 --
python/src/pyarrow/config.cc | 35 -
python/src/pyarrow/config.h | 46 -
python/src/pyarrow/do_import_numpy.h | 21 -
python/src/pyarrow/helpers.cc | 55 -
python/src/pyarrow/helpers.h | 35 -
python/src/pyarrow/io.cc | 221 ---
python/src/pyarrow/io.h | 99 --
python/src/pyarrow/numpy_interop.h | 60 -
python/src/pyarrow/type_traits.h | 212 ---
python/src/pyarrow/util/CMakeLists.txt | 39 -
python/src/pyarrow/util/datetime.h | 42 -
python/src/pyarrow/util/test_main.cc | 36 -
54 files changed, 4409 insertions(+), 4461 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 6f4b8e9..df11209 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -23,7 +23,6 @@ export MINICONDA=$HOME/miniconda
export PATH="$MINICONDA/bin:$PATH"
export ARROW_HOME=$ARROW_CPP_INSTALL
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ARROW_CPP_INSTALL/lib
pushd $PYTHON_DIR
export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
@@ -70,11 +69,31 @@ build_parquet_cpp() {
build_parquet_cpp
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PARQUET_HOME/lib
+function build_arrow_libraries() {
+ CPP_BUILD_DIR=$1
+ CPP_DIR=$TRAVIS_BUILD_DIR/cpp
+
+ mkdir $CPP_BUILD_DIR
+ pushd $CPP_BUILD_DIR
+
+ cmake -DARROW_BUILD_TESTS=off \
+ -DARROW_PYTHON=on \
+ -DCMAKE_INSTALL_PREFIX=$2 \
+ $CPP_DIR
+
+ make -j4
+ make install
+
+ popd
+}
python_version_tests() {
PYTHON_VERSION=$1
CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+
+ export ARROW_HOME=$TRAVIS_BUILD_DIR/arrow-install-$PYTHON_VERSION
+ export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib
+
conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
source activate $CONDA_ENV_DIR
@@ -87,6 +106,9 @@ python_version_tests() {
# Expensive dependencies install from Continuum package repo
conda install -y pip numpy pandas cython
+ # Build C++ libraries
+ build_arrow_libraries arrow-build-$PYTHON_VERSION $ARROW_HOME
+
# Other stuff pip install
pip install -r requirements.txt
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c04afe4..c77cf60 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -106,6 +106,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Rely on boost shared libraries where relevant"
ON)
+ option(ARROW_PYTHON
+ "Build the Arrow CPython extensions"
+ OFF)
+
option(ARROW_SSE3
"Build Arrow with SSE3"
ON)
@@ -133,6 +137,7 @@ if(NOT ARROW_BUILD_BENCHMARKS)
set(NO_BENCHMARKS 1)
endif()
+include(BuildUtils)
############################################################
# Compiler flags
@@ -303,6 +308,14 @@ endfunction()
#
# Arguments after the test name will be passed to set_tests_properties().
function(ADD_ARROW_TEST REL_TEST_NAME)
+ set(options)
+ set(single_value_args)
+ set(multi_value_args STATIC_LINK_LIBS)
+ cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+ if(ARG_UNPARSED_ARGUMENTS)
+ message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+ endif()
+
if(NO_TESTS OR NOT ARROW_BUILD_STATIC)
return()
endif()
@@ -312,7 +325,13 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
# This test has a corresponding .cc file, set it up as an executable.
set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
- target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+
+ if (ARG_STATIC_LINK_LIBS)
+ # Customize link libraries
+ target_link_libraries(${TEST_NAME} ${ARG_STATIC_LINK_LIBS})
+ else()
+ target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+ endif()
add_dependencies(unittest ${TEST_NAME})
else()
# No executable, just invoke the test (probably a script) directly.
@@ -332,10 +351,6 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
endif()
set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
-
- if(ARGN)
- set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
- endif()
endfunction()
# A wrapper for add_dependencies() that is compatible with NO_TESTS.
@@ -363,72 +378,6 @@ enable_testing()
############################################################
# Dependencies
############################################################
-function(ADD_THIRDPARTY_LIB LIB_NAME)
- set(options)
- set(one_value_args SHARED_LIB STATIC_LIB)
- set(multi_value_args DEPS)
- cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
- if(ARG_UNPARSED_ARGUMENTS)
- message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
- endif()
-
- if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
- if(NOT ARG_STATIC_LIB)
- message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
- endif()
-
- SET(AUG_LIB_NAME "${LIB_NAME}_static")
- add_library(${AUG_LIB_NAME} STATIC IMPORTED)
- set_target_properties(${AUG_LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
- message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
-
- SET(AUG_LIB_NAME "${LIB_NAME}_shared")
- add_library(${AUG_LIB_NAME} SHARED IMPORTED)
-
- if(MSVC)
- # Mark the \u201d.lib\u201d location as part of a Windows DLL
- set_target_properties(${AUG_LIB_NAME}
- PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
- else()
- set_target_properties(${AUG_LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
- endif()
- message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
- elseif(ARG_STATIC_LIB)
- add_library(${LIB_NAME} STATIC IMPORTED)
- set_target_properties(${LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
- SET(AUG_LIB_NAME "${LIB_NAME}_static")
- add_library(${AUG_LIB_NAME} STATIC IMPORTED)
- set_target_properties(${AUG_LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
- message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
- elseif(ARG_SHARED_LIB)
- add_library(${LIB_NAME} SHARED IMPORTED)
- set_target_properties(${LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
- SET(AUG_LIB_NAME "${LIB_NAME}_shared")
- add_library(${AUG_LIB_NAME} SHARED IMPORTED)
-
- if(MSVC)
- # Mark the \u201d.lib\u201d location as part of a Windows DLL
- set_target_properties(${AUG_LIB_NAME}
- PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
- else()
- set_target_properties(${AUG_LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
- endif()
- message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
- else()
- message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
- endif()
-
- if(ARG_DEPS)
- set_target_properties(${LIB_NAME}
- PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
- endif()
-endfunction()
# ----------------------------------------------------------------------
# Add Boost dependencies (code adapted from Apache Kudu (incubating))
@@ -798,8 +747,7 @@ if (${CLANG_FORMAT_FOUND})
add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
sed -e '/_generated/g' |
- sed -e '/windows_compatibility.h/g'`
- `find ${CMAKE_CURRENT_SOURCE_DIR}/../python -name \\*.cc -or -name \\*.h`)
+ sed -e '/windows_compatibility.h/g'`)
# runs clang format and exits with a non-zero exit code if any files need to be reformatted
add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 0
@@ -857,11 +805,9 @@ if(NOT APPLE)
set(ARROW_SHARED_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/arrow/symbols.map")
endif()
-include(BuildUtils)
-
ADD_ARROW_LIB(arrow
- SOURCES ${ARROW_SRCS}
- SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS}
+ SOURCES ${ARROW_SRCS}
+ SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS}
)
add_subdirectory(src/arrow)
@@ -875,6 +821,10 @@ endif()
#----------------------------------------------------------------------
# IPC library
+if(ARROW_PYTHON)
+ set(ARROW_IPC on)
+endif()
+
## Flatbuffers
if(ARROW_IPC)
if("$ENV{FLATBUFFERS_HOME}" STREQUAL "")
@@ -908,3 +858,14 @@ if(ARROW_IPC)
add_subdirectory(src/arrow/ipc)
endif()
+
+if(ARROW_PYTHON)
+ find_package(PythonLibsNew REQUIRED)
+ find_package(NumPy REQUIRED)
+
+ include_directories(SYSTEM
+ ${NUMPY_INCLUDE_DIRS}
+ ${PYTHON_INCLUDE_DIRS})
+
+ add_subdirectory(src/arrow/python)
+endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/cmake_modules/BuildUtils.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 78b514c..c993041 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -15,6 +15,73 @@
# specific language governing permissions and limitations
# under the License.
+function(ADD_THIRDPARTY_LIB LIB_NAME)
+ set(options)
+ set(one_value_args SHARED_LIB STATIC_LIB)
+ set(multi_value_args DEPS)
+ cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+ if(ARG_UNPARSED_ARGUMENTS)
+ message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+ endif()
+
+ if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
+ if(NOT ARG_STATIC_LIB)
+ message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
+ endif()
+
+ SET(AUG_LIB_NAME "${LIB_NAME}_static")
+ add_library(${AUG_LIB_NAME} STATIC IMPORTED)
+ set_target_properties(${AUG_LIB_NAME}
+ PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+ message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
+
+ SET(AUG_LIB_NAME "${LIB_NAME}_shared")
+ add_library(${AUG_LIB_NAME} SHARED IMPORTED)
+
+ if(MSVC)
+ # Mark the \u201d.lib\u201d location as part of a Windows DLL
+ set_target_properties(${AUG_LIB_NAME}
+ PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+ else()
+ set_target_properties(${AUG_LIB_NAME}
+ PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+ endif()
+ message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
+ elseif(ARG_STATIC_LIB)
+ add_library(${LIB_NAME} STATIC IMPORTED)
+ set_target_properties(${LIB_NAME}
+ PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+ SET(AUG_LIB_NAME "${LIB_NAME}_static")
+ add_library(${AUG_LIB_NAME} STATIC IMPORTED)
+ set_target_properties(${AUG_LIB_NAME}
+ PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+ message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
+ elseif(ARG_SHARED_LIB)
+ add_library(${LIB_NAME} SHARED IMPORTED)
+ set_target_properties(${LIB_NAME}
+ PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+ SET(AUG_LIB_NAME "${LIB_NAME}_shared")
+ add_library(${AUG_LIB_NAME} SHARED IMPORTED)
+
+ if(MSVC)
+ # Mark the \u201d.lib\u201d location as part of a Windows DLL
+ set_target_properties(${AUG_LIB_NAME}
+ PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+ else()
+ set_target_properties(${AUG_LIB_NAME}
+ PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+ endif()
+ message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
+ else()
+ message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
+ endif()
+
+ if(ARG_DEPS)
+ set_target_properties(${LIB_NAME}
+ PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
+ endif()
+endfunction()
+
function(ADD_ARROW_LIB LIB_NAME)
set(options)
set(one_value_args SHARED_LINK_FLAGS)
@@ -45,9 +112,16 @@ function(ADD_ARROW_LIB LIB_NAME)
if (ARROW_BUILD_SHARED)
add_library(${LIB_NAME}_shared SHARED $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
+
if(APPLE)
- set_target_properties(${LIB_NAME}_shared PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+ # On OS X, you can avoid linking at library load time and instead
+ # expecting that the symbols have been loaded separately. This happens
+ # with libpython* where there can be conflicts between system Python and
+ # the Python from a thirdparty distribution
+ set(ARG_SHARED_LINK_FLAGS
+ "-undefined dynamic_lookup ${ARG_SHARED_LINK_FLAGS}")
endif()
+
set_target_properties(${LIB_NAME}_shared
PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
@@ -55,6 +129,7 @@ function(ADD_ARROW_LIB LIB_NAME)
OUTPUT_NAME ${LIB_NAME}
VERSION "${ARROW_ABI_VERSION}"
SOVERSION "${ARROW_SO_VERSION}")
+
target_link_libraries(${LIB_NAME}_shared
LINK_PUBLIC ${ARG_SHARED_LINK_LIBS}
LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS})
@@ -68,28 +143,28 @@ function(ADD_ARROW_LIB LIB_NAME)
set_target_properties(${LIB_NAME}_shared PROPERTIES
INSTALL_RPATH ${_lib_install_rpath})
endif()
-
+
install(TARGETS ${LIB_NAME}_shared
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
-
+
if (ARROW_BUILD_STATIC)
add_library(${LIB_NAME}_static STATIC $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
set_target_properties(${LIB_NAME}_static
PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
OUTPUT_NAME ${LIB_NAME})
-
+
target_link_libraries(${LIB_NAME}_static
LINK_PUBLIC ${ARG_STATIC_LINK_LIBS}
LINK_PRIVATE ${ARG_STATIC_PRIVATE_LINK_LIBS})
-
+
install(TARGETS ${LIB_NAME}_static
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
-
+
if (APPLE)
set_target_properties(${LIB_NAME}_shared
PROPERTIES
@@ -98,4 +173,3 @@ function(ADD_ARROW_LIB LIB_NAME)
endif()
endfunction()
-
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/cmake_modules/FindNumPy.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindNumPy.cmake b/cpp/cmake_modules/FindNumPy.cmake
new file mode 100644
index 0000000..58bb531
--- /dev/null
+++ b/cpp/cmake_modules/FindNumPy.cmake
@@ -0,0 +1,100 @@
+# - Find the NumPy libraries
+# This module finds if NumPy is installed, and sets the following variables
+# indicating where it is.
+#
+# TODO: Update to provide the libraries and paths for linking npymath lib.
+#
+# NUMPY_FOUND - was NumPy found
+# NUMPY_VERSION - the version of NumPy found as a string
+# NUMPY_VERSION_MAJOR - the major version number of NumPy
+# NUMPY_VERSION_MINOR - the minor version number of NumPy
+# NUMPY_VERSION_PATCH - the patch version number of NumPy
+# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
+# NUMPY_INCLUDE_DIRS - path to the NumPy include files
+
+#============================================================================
+# Copyright 2012 Continuum Analytics, Inc.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#============================================================================
+
+# Finding NumPy involves calling the Python interpreter
+if(NumPy_FIND_REQUIRED)
+ find_package(PythonInterp REQUIRED)
+else()
+ find_package(PythonInterp)
+endif()
+
+if(NOT PYTHONINTERP_FOUND)
+ set(NUMPY_FOUND FALSE)
+ return()
+endif()
+
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+ "import numpy as n; print(n.__version__); print(n.get_include());"
+ RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS
+ OUTPUT_VARIABLE _NUMPY_VALUES_OUTPUT
+ ERROR_VARIABLE _NUMPY_ERROR_VALUE
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0)
+ if(NumPy_FIND_REQUIRED)
+ message(FATAL_ERROR
+ "NumPy import failure:\n${_NUMPY_ERROR_VALUE}")
+ endif()
+ set(NUMPY_FOUND FALSE)
+ return()
+endif()
+
+# Convert the process output into a list
+string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES_OUTPUT})
+string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES})
+list(GET _NUMPY_VALUES 0 NUMPY_VERSION)
+list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS)
+
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" _VER_CHECK "${NUMPY_VERSION}")
+if("${_VER_CHECK}" STREQUAL "")
+ # The output from Python was unexpected. Raise an error always
+ # here, because we found NumPy, but it appears to be corrupted somehow.
+ message(FATAL_ERROR
+ "Requested version and include path from NumPy, got instead:\n${_NUMPY_VALUES_OUTPUT}\n")
+ return()
+endif()
+
+# Make sure all directory separators are '/'
+string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS})
+
+# Get the major and minor version numbers
+string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION})
+list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR)
+list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR)
+list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH)
+string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH})
+math(EXPR NUMPY_VERSION_DECIMAL
+ "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
+
+find_package_message(NUMPY
+ "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}"
+ "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}")
+
+set(NUMPY_FOUND TRUE)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/cmake_modules/FindPythonLibsNew.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindPythonLibsNew.cmake b/cpp/cmake_modules/FindPythonLibsNew.cmake
new file mode 100644
index 0000000..1000a95
--- /dev/null
+++ b/cpp/cmake_modules/FindPythonLibsNew.cmake
@@ -0,0 +1,241 @@
+# - Find python libraries
+# This module finds the libraries corresponding to the Python interpeter
+# FindPythonInterp provides.
+# This code sets the following variables:
+#
+# PYTHONLIBS_FOUND - have the Python libs been found
+# PYTHON_PREFIX - path to the Python installation
+# PYTHON_LIBRARIES - path to the python library
+# PYTHON_INCLUDE_DIRS - path to where Python.h is found
+# PYTHON_SITE_PACKAGES - path to installation site-packages
+# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build
+#
+# PYTHON_INCLUDE_PATH - path to where Python.h is found (deprecated)
+#
+# A function PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is defined
+# to build modules for python.
+#
+# Thanks to talljimbo for the patch adding the 'LDVERSION' config
+# variable usage.
+
+#=============================================================================
+# Copyright 2001-2009 Kitware, Inc.
+# Copyright 2012-2014 Continuum Analytics, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the names of Kitware, Inc., the Insight Software Consortium,
+# nor the names of their contributors may be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+# License text for the above reference.)
+
+# Use the Python interpreter to find the libs.
+if(PythonLibsNew_FIND_REQUIRED)
+ find_package(PythonInterp REQUIRED)
+else()
+ find_package(PythonInterp)
+endif()
+
+if(NOT PYTHONINTERP_FOUND)
+ set(PYTHONLIBS_FOUND FALSE)
+ return()
+endif()
+
+# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
+# testing whether sys has the gettotalrefcount function is a reliable,
+# cross-platform way to detect a CPython debug interpreter.
+#
+# The library suffix is from the config var LDVERSION sometimes, otherwise
+# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
+#
+# The config var LIBPL is for Linux, and helps on Debian Jessie where the
+# addition of multi-arch support shuffled things around.
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+ "from distutils import sysconfig as s;import sys;import struct;
+print('.'.join(str(v) for v in sys.version_info));
+print(sys.prefix);
+print(s.get_python_inc(plat_specific=True));
+print(s.get_python_lib(plat_specific=True));
+print(s.get_config_var('SO'));
+print(hasattr(sys, 'gettotalrefcount')+0);
+print(struct.calcsize('@P'));
+print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+print(s.get_config_var('LIBPL'));
+"
+ RESULT_VARIABLE _PYTHON_SUCCESS
+ OUTPUT_VARIABLE _PYTHON_VALUES
+ ERROR_VARIABLE _PYTHON_ERROR_VALUE
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+if(NOT _PYTHON_SUCCESS MATCHES 0)
+ if(PythonLibsNew_FIND_REQUIRED)
+ message(FATAL_ERROR
+ "Python config failure:\n${_PYTHON_ERROR_VALUE}")
+ endif()
+ set(PYTHONLIBS_FOUND FALSE)
+ return()
+endif()
+
+# Convert the process output into a list
+string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
+string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
+list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
+list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
+list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
+list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
+list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
+list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
+list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
+list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
+list(GET _PYTHON_VALUES 8 PYTHON_LIBRARY_PATH)
+
+# Make sure the Python has the same pointer-size as the chosen compiler
+# Skip the check on OS X, it doesn't consistently have CMAKE_SIZEOF_VOID_P defined
+if((NOT APPLE) AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
+ if(PythonLibsNew_FIND_REQUIRED)
+ math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
+ math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
+ message(FATAL_ERROR
+ "Python config failure: Python is ${_PYTHON_BITS}-bit, "
+ "chosen compiler is ${_CMAKE_BITS}-bit")
+ endif()
+ set(PYTHONLIBS_FOUND FALSE)
+ return()
+endif()
+
+# The built-in FindPython didn't always give the version numbers
+string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
+list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
+list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
+list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
+
+# Make sure all directory separators are '/'
+string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
+string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
+string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
+
+if(CMAKE_HOST_WIN32)
+ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
+ set(PYTHON_LIBRARY
+ "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+ else()
+ set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/libpython${PYTHON_LIBRARY_SUFFIX}.a")
+ endif()
+elseif(APPLE)
+ # Seems to require "-undefined dynamic_lookup" instead of linking
+ # against the .dylib, otherwise it crashes. This flag is added
+ # below
+ set(PYTHON_LIBRARY "")
+ #set(PYTHON_LIBRARY
+ # "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib")
+else()
+ if(${PYTHON_SIZEOF_VOID_P} MATCHES 8)
+ set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}")
+ else()
+ set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}")
+ endif()
+ message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
+ message(STATUS "Looking for python${PYTHON_LIBRARY_SUFFIX}")
+ # Probably this needs to be more involved. It would be nice if the config
+ # information the python interpreter itself gave us were more complete.
+ find_library(PYTHON_LIBRARY
+ NAMES "python${PYTHON_LIBRARY_SUFFIX}"
+ PATHS ${_PYTHON_LIBS_SEARCH}
+ NO_SYSTEM_ENVIRONMENT_PATH)
+ message(STATUS "Found Python lib ${PYTHON_LIBRARY}")
+endif()
+
+# For backward compatibility, set PYTHON_INCLUDE_PATH, but make it internal.
+SET(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}" CACHE INTERNAL
+ "Path to where Python.h is found (deprecated)")
+
+MARK_AS_ADVANCED(
+ PYTHON_LIBRARY
+ PYTHON_INCLUDE_DIR
+)
+
+# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
+# cache entries because they are meant to specify the location of a single
+# library. We now set the variables listed by the documentation for this
+# module.
+SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
+SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
+SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
+
+
+# Don't know how to get to this directory, just doing something simple :P
+#INCLUDE(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
+#FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs DEFAULT_MSG PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS)
+find_package_message(PYTHON
+ "Found PythonLibs: ${PYTHON_LIBRARY}"
+ "${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
+
+
+# PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is used to build modules for python.
+FUNCTION(PYTHON_ADD_MODULE _NAME )
+ GET_PROPERTY(_TARGET_SUPPORTS_SHARED_LIBS
+ GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
+ OPTION(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE)
+ OPTION(PYTHON_MODULE_${_NAME}_BUILD_SHARED
+ "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS})
+
+ # Mark these options as advanced
+ MARK_AS_ADVANCED(PYTHON_ENABLE_MODULE_${_NAME}
+ PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+
+ IF(PYTHON_ENABLE_MODULE_${_NAME})
+ IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+ SET(PY_MODULE_TYPE MODULE)
+ ELSE(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+ SET(PY_MODULE_TYPE STATIC)
+ SET_PROPERTY(GLOBAL APPEND PROPERTY PY_STATIC_MODULES_LIST ${_NAME})
+ ENDIF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+
+ SET_PROPERTY(GLOBAL APPEND PROPERTY PY_MODULES_LIST ${_NAME})
+ ADD_LIBRARY(${_NAME} ${PY_MODULE_TYPE} ${ARGN})
+ IF(APPLE)
+ # On OS X, linking against the Python libraries causes
+ # segfaults, so do this dynamic lookup instead.
+ SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS
+ "-undefined dynamic_lookup")
+ ELSE()
+ # In general, we should not link against libpython as we do not embed
+ # the Python interpreter. The python binary itself can then define where
+ # the symbols should loaded from.
+ SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS
+ "-Wl,-undefined,dynamic_lookup")
+ ENDIF()
+ IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+ SET_TARGET_PROPERTIES(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")
+ SET_TARGET_PROPERTIES(${_NAME} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}")
+ ELSE()
+ ENDIF()
+
+ ENDIF(PYTHON_ENABLE_MODULE_${_NAME})
+ENDFUNCTION(PYTHON_ADD_MODULE)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
new file mode 100644
index 0000000..03f5afc
--- /dev/null
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+
+#######################################
+# arrow_python
+#######################################
+
+if (ARROW_BUILD_TESTS)
+ add_library(arrow_python_test_main STATIC
+ util/test_main.cc)
+
+ if (APPLE)
+ target_link_libraries(arrow_python_test_main
+ gtest
+ dl)
+ set_target_properties(arrow_python_test_main
+ PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+ else()
+ target_link_libraries(arrow_python_test_main
+ gtest
+ pthread
+ dl
+ )
+ endif()
+endif()
+
+set(ARROW_PYTHON_MIN_TEST_LIBS
+ arrow_python_test_main
+ arrow_python_static
+ arrow_ipc_static
+ arrow_io_static
+ arrow_static)
+
+if(NOT APPLE AND ARROW_BUILD_TESTS)
+ ADD_THIRDPARTY_LIB(python
+ SHARED_LIB "${PYTHON_LIBRARIES}")
+ list(APPEND ARROW_PYTHON_MIN_TEST_LIBS python)
+endif()
+
+set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
+
+# ----------------------------------------------------------------------
+
+set(ARROW_PYTHON_SRCS
+ builtin_convert.cc
+ common.cc
+ config.cc
+ helpers.cc
+ io.cc
+ pandas_convert.cc
+)
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS
+ arrow_io_shared
+ arrow_ipc_shared
+ arrow_shared
+)
+
+ADD_ARROW_LIB(arrow_python
+ SOURCES ${ARROW_PYTHON_SRCS}
+ SHARED_LINK_FLAGS ""
+ SHARED_LINK_LIBS ${ARROW_PYTHON_SHARED_LINK_LIBS}
+ STATIC_LINK_LIBS ${ARROW_IO_SHARED_PRIVATE_LINK_LIBS}
+)
+
+install(FILES
+ api.h
+ builtin_convert.h
+ common.h
+ config.h
+ do_import_numpy.h
+ helpers.h
+ io.h
+ numpy_interop.h
+ pandas_convert.h
+ type_traits.h
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/python")
+
+# set_target_properties(arrow_python_shared PROPERTIES
+# INSTALL_RPATH "\$ORIGIN")
+
+if (ARROW_BUILD_TESTS)
+ ADD_ARROW_TEST(pandas-test
+ STATIC_LINK_LIBS "${ARROW_PYTHON_TEST_LINK_LIBS}")
+endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/api.h b/cpp/src/arrow/python/api.h
new file mode 100644
index 0000000..f4f1c0c
--- /dev/null
+++ b/cpp/src/arrow/python/api.h
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_API_H
+#define ARROW_PYTHON_API_H
+
+#include "arrow/python/builtin_convert.h"
+#include "arrow/python/common.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/io.h"
+#include "arrow/python/pandas_convert.h"
+
+#endif // ARROW_PYTHON_API_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
new file mode 100644
index 0000000..9acccc1
--- /dev/null
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <Python.h>
+#include <datetime.h>
+#include <sstream>
+
+#include "arrow/python/builtin_convert.h"
+
+#include "arrow/api.h"
+#include "arrow/status.h"
+
+#include "arrow/python/helpers.h"
+#include "arrow/python/util/datetime.h"
+
+namespace arrow {
+namespace py {
+
+static inline bool IsPyInteger(PyObject* obj) {
+#if PYARROW_IS_PY2
+ return PyLong_Check(obj) || PyInt_Check(obj);
+#else
+ return PyLong_Check(obj);
+#endif
+}
+
+class ScalarVisitor {
+ public:
+ ScalarVisitor()
+ : total_count_(0),
+ none_count_(0),
+ bool_count_(0),
+ int_count_(0),
+ date_count_(0),
+ timestamp_count_(0),
+ float_count_(0),
+ binary_count_(0),
+ unicode_count_(0) {}
+
+ void Visit(PyObject* obj) {
+ ++total_count_;
+ if (obj == Py_None) {
+ ++none_count_;
+ } else if (PyBool_Check(obj)) {
+ ++bool_count_;
+ } else if (PyFloat_Check(obj)) {
+ ++float_count_;
+ } else if (IsPyInteger(obj)) {
+ ++int_count_;
+ } else if (PyDate_CheckExact(obj)) {
+ ++date_count_;
+ } else if (PyDateTime_CheckExact(obj)) {
+ ++timestamp_count_;
+ } else if (PyBytes_Check(obj)) {
+ ++binary_count_;
+ } else if (PyUnicode_Check(obj)) {
+ ++unicode_count_;
+ } else {
+ // TODO(wesm): accumulate error information somewhere
+ }
+ }
+
+ std::shared_ptr<DataType> GetType() {
+ // TODO(wesm): handling mixed-type cases
+ if (float_count_) {
+ return float64();
+ } else if (int_count_) {
+ // TODO(wesm): tighter type later
+ return int64();
+ } else if (date_count_) {
+ return date64();
+ } else if (timestamp_count_) {
+ return timestamp(TimeUnit::MICRO);
+ } else if (bool_count_) {
+ return boolean();
+ } else if (binary_count_) {
+ return binary();
+ } else if (unicode_count_) {
+ return utf8();
+ } else {
+ return null();
+ }
+ }
+
+ int64_t total_count() const { return total_count_; }
+
+ private:
+ int64_t total_count_;
+ int64_t none_count_;
+ int64_t bool_count_;
+ int64_t int_count_;
+ int64_t date_count_;
+ int64_t timestamp_count_;
+ int64_t float_count_;
+ int64_t binary_count_;
+ int64_t unicode_count_;
+
+ // Place to accumulate errors
+ // std::vector<Status> errors_;
+};
+
+static constexpr int MAX_NESTING_LEVELS = 32;
+
+class SeqVisitor {
+ public:
+ SeqVisitor() : max_nesting_level_(0) {
+ memset(nesting_histogram_, 0, MAX_NESTING_LEVELS * sizeof(int));
+ }
+
+ Status Visit(PyObject* obj, int level = 0) {
+ Py_ssize_t size = PySequence_Size(obj);
+
+ if (level > max_nesting_level_) { max_nesting_level_ = level; }
+
+ for (int64_t i = 0; i < size; ++i) {
+ // TODO(wesm): Error checking?
+ // TODO(wesm): Specialize for PyList_GET_ITEM?
+ OwnedRef item_ref(PySequence_GetItem(obj, i));
+ PyObject* item = item_ref.obj();
+
+ if (PyList_Check(item)) {
+ RETURN_NOT_OK(Visit(item, level + 1));
+ } else if (PyDict_Check(item)) {
+ return Status::NotImplemented("No type inference for dicts");
+ } else {
+ // We permit nulls at any level of nesting
+ if (item == Py_None) {
+ // TODO
+ } else {
+ ++nesting_histogram_[level];
+ scalars_.Visit(item);
+ }
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<DataType> GetType() {
+ if (scalars_.total_count() == 0) {
+ if (max_nesting_level_ == 0) {
+ return null();
+ } else {
+ return nullptr;
+ }
+ } else {
+ std::shared_ptr<DataType> result = scalars_.GetType();
+ for (int i = 0; i < max_nesting_level_; ++i) {
+ result = std::make_shared<ListType>(result);
+ }
+ return result;
+ }
+ }
+
+ Status Validate() const {
+ if (scalars_.total_count() > 0) {
+ if (num_nesting_levels() > 1) {
+ return Status::Invalid("Mixed nesting levels not supported");
+ } else if (max_observed_level() < max_nesting_level_) {
+ return Status::Invalid("Mixed nesting levels not supported");
+ }
+ }
+ return Status::OK();
+ }
+
+ int max_observed_level() const {
+ int result = 0;
+ for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
+ if (nesting_histogram_[i] > 0) { result = i; }
+ }
+ return result;
+ }
+
+ int num_nesting_levels() const {
+ int result = 0;
+ for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
+ if (nesting_histogram_[i] > 0) { ++result; }
+ }
+ return result;
+ }
+
+ private:
+ ScalarVisitor scalars_;
+
+ // Track observed
+ int max_nesting_level_;
+ int nesting_histogram_[MAX_NESTING_LEVELS];
+};
+
+// Non-exhaustive type inference
+Status InferArrowType(PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) {
+ *size = PySequence_Size(obj);
+ if (PyErr_Occurred()) {
+ // Not a sequence
+ PyErr_Clear();
+ return Status::TypeError("Object is not a sequence");
+ }
+
+ // For 0-length sequences, refuse to guess
+ if (*size == 0) { *out_type = null(); }
+
+ SeqVisitor seq_visitor;
+ RETURN_NOT_OK(seq_visitor.Visit(obj));
+ RETURN_NOT_OK(seq_visitor.Validate());
+
+ *out_type = seq_visitor.GetType();
+
+ if (*out_type == nullptr) { return Status::TypeError("Unable to determine data type"); }
+
+ return Status::OK();
+}
+
+// Marshal Python sequence (list, tuple, etc.) to Arrow array
+class SeqConverter {
+ public:
+ virtual Status Init(const std::shared_ptr<ArrayBuilder>& builder) {
+ builder_ = builder;
+ return Status::OK();
+ }
+
+ virtual Status AppendData(PyObject* seq) = 0;
+
+ protected:
+ std::shared_ptr<ArrayBuilder> builder_;
+};
+
+template <typename BuilderType>
+class TypedConverter : public SeqConverter {
+ public:
+ Status Init(const std::shared_ptr<ArrayBuilder>& builder) override {
+ builder_ = builder;
+ typed_builder_ = static_cast<BuilderType*>(builder.get());
+ return Status::OK();
+ }
+
+ protected:
+ BuilderType* typed_builder_;
+};
+
+class BoolConverter : public TypedConverter<BooleanBuilder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_NOT_OK(typed_builder_->Reserve(size));
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ typed_builder_->AppendNull();
+ } else {
+ if (item.obj() == Py_True) {
+ typed_builder_->Append(true);
+ } else {
+ typed_builder_->Append(false);
+ }
+ }
+ }
+ return Status::OK();
+ }
+};
+
+class Int64Converter : public TypedConverter<Int64Builder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ int64_t val;
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_NOT_OK(typed_builder_->Reserve(size));
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ typed_builder_->AppendNull();
+ } else {
+ val = PyLong_AsLongLong(item.obj());
+ RETURN_IF_PYERROR();
+ typed_builder_->Append(val);
+ }
+ }
+ return Status::OK();
+ }
+};
+
+class DateConverter : public TypedConverter<Date64Builder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_NOT_OK(typed_builder_->Reserve(size));
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ typed_builder_->AppendNull();
+ } else {
+ PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+ typed_builder_->Append(PyDate_to_ms(pydate));
+ }
+ }
+ return Status::OK();
+ }
+};
+
+class TimestampConverter : public TypedConverter<TimestampBuilder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_NOT_OK(typed_builder_->Reserve(size));
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ typed_builder_->AppendNull();
+ } else {
+ PyDateTime_DateTime* pydatetime =
+ reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+ struct tm datetime = {0};
+ datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900;
+ datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1;
+ datetime.tm_mday = PyDateTime_GET_DAY(pydatetime);
+ datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime);
+ datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime);
+ datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime);
+ int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
+ RETURN_IF_PYERROR();
+ struct tm epoch = {0};
+ epoch.tm_year = 70;
+ epoch.tm_mday = 1;
+ // Microseconds since the epoch
+ int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us;
+ typed_builder_->Append(val);
+ }
+ }
+ return Status::OK();
+ }
+};
+
+class DoubleConverter : public TypedConverter<DoubleBuilder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ double val;
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_NOT_OK(typed_builder_->Reserve(size));
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ typed_builder_->AppendNull();
+ } else {
+ val = PyFloat_AsDouble(item.obj());
+ RETURN_IF_PYERROR();
+ typed_builder_->Append(val);
+ }
+ }
+ return Status::OK();
+ }
+};
+
+class BytesConverter : public TypedConverter<BinaryBuilder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ PyObject* item;
+ PyObject* bytes_obj;
+ OwnedRef tmp;
+ const char* bytes;
+ int64_t length;
+ Py_ssize_t size = PySequence_Size(seq);
+ for (int64_t i = 0; i < size; ++i) {
+ item = PySequence_GetItem(seq, i);
+ OwnedRef holder(item);
+
+ if (item == Py_None) {
+ RETURN_NOT_OK(typed_builder_->AppendNull());
+ continue;
+ } else if (PyUnicode_Check(item)) {
+ tmp.reset(PyUnicode_AsUTF8String(item));
+ RETURN_IF_PYERROR();
+ bytes_obj = tmp.obj();
+ } else if (PyBytes_Check(item)) {
+ bytes_obj = item;
+ } else {
+ return Status::TypeError("Non-string value encountered");
+ }
+ // No error checking
+ length = PyBytes_GET_SIZE(bytes_obj);
+ bytes = PyBytes_AS_STRING(bytes_obj);
+ RETURN_NOT_OK(typed_builder_->Append(bytes, length));
+ }
+ return Status::OK();
+ }
+};
+
+class UTF8Converter : public TypedConverter<StringBuilder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ PyObject* item;
+ PyObject* bytes_obj;
+ OwnedRef tmp;
+ const char* bytes;
+ int64_t length;
+ Py_ssize_t size = PySequence_Size(seq);
+ for (int64_t i = 0; i < size; ++i) {
+ item = PySequence_GetItem(seq, i);
+ OwnedRef holder(item);
+
+ if (item == Py_None) {
+ RETURN_NOT_OK(typed_builder_->AppendNull());
+ continue;
+ } else if (!PyUnicode_Check(item)) {
+ return Status::TypeError("Non-unicode value encountered");
+ }
+ tmp.reset(PyUnicode_AsUTF8String(item));
+ RETURN_IF_PYERROR();
+ bytes_obj = tmp.obj();
+
+ // No error checking
+ length = PyBytes_GET_SIZE(bytes_obj);
+ bytes = PyBytes_AS_STRING(bytes_obj);
+ RETURN_NOT_OK(typed_builder_->Append(bytes, length));
+ }
+ return Status::OK();
+ }
+};
+
+class ListConverter : public TypedConverter<ListBuilder> {
+ public:
+ Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;
+
+ Status AppendData(PyObject* seq) override {
+ Py_ssize_t size = PySequence_Size(seq);
+ for (int64_t i = 0; i < size; ++i) {
+ OwnedRef item(PySequence_GetItem(seq, i));
+ if (item.obj() == Py_None) {
+ RETURN_NOT_OK(typed_builder_->AppendNull());
+ } else {
+ typed_builder_->Append();
+ RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
+ }
+ }
+ return Status::OK();
+ }
+
+ protected:
+ std::shared_ptr<SeqConverter> value_converter_;
+};
+
+// Dynamic constructor for sequence converters
+std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
+ switch (type->type) {
+ case Type::BOOL:
+ return std::make_shared<BoolConverter>();
+ case Type::INT64:
+ return std::make_shared<Int64Converter>();
+ case Type::DATE64:
+ return std::make_shared<DateConverter>();
+ case Type::TIMESTAMP:
+ return std::make_shared<TimestampConverter>();
+ case Type::DOUBLE:
+ return std::make_shared<DoubleConverter>();
+ case Type::BINARY:
+ return std::make_shared<BytesConverter>();
+ case Type::STRING:
+ return std::make_shared<UTF8Converter>();
+ case Type::LIST:
+ return std::make_shared<ListConverter>();
+ case Type::STRUCT:
+ default:
+ return nullptr;
+ break;
+ }
+}
+
+Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
+ builder_ = builder;
+ typed_builder_ = static_cast<ListBuilder*>(builder.get());
+
+ value_converter_ =
+ GetConverter(static_cast<ListType*>(builder->type().get())->value_type());
+ if (value_converter_ == nullptr) {
+ return Status::NotImplemented("value type not implemented");
+ }
+
+ value_converter_->Init(typed_builder_->value_builder());
+ return Status::OK();
+}
+
+Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
+ const std::shared_ptr<ArrayBuilder>& builder) {
+ std::shared_ptr<SeqConverter> converter = GetConverter(type);
+ if (converter == nullptr) {
+ std::stringstream ss;
+ ss << "No type converter implemented for " << type->ToString();
+ return Status::NotImplemented(ss.str());
+ }
+ converter->Init(builder);
+
+ return converter->AppendData(obj);
+}
+
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out) {
+ std::shared_ptr<DataType> type;
+ int64_t size;
+ PyDateTime_IMPORT;
+ RETURN_NOT_OK(InferArrowType(obj, &size, &type));
+
+ // Handle NA / NullType case
+ if (type->type == Type::NA) {
+ out->reset(new NullArray(size));
+ return Status::OK();
+ }
+
+ // Give the sequence converter an array builder
+ std::shared_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(pool, type, &builder));
+ RETURN_NOT_OK(AppendPySequence(obj, type, builder));
+
+ return builder->Finish(out);
+}
+
+} // namespace py
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
new file mode 100644
index 0000000..7b50990
--- /dev/null
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for converting between CPython built-in data structures and Arrow
+// data structures
+
+#ifndef ARROW_PYTHON_ADAPTERS_BUILTIN_H
+#define ARROW_PYTHON_ADAPTERS_BUILTIN_H
+
+#include <Python.h>
+
+#include <memory>
+
+#include <arrow/type.h>
+
+#include "arrow/util/visibility.h"
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+
+class Array;
+class Status;
+
+namespace py {
+
+ARROW_EXPORT arrow::Status InferArrowType(
+ PyObject* obj, int64_t* size, std::shared_ptr<arrow::DataType>* out_type);
+
+ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj,
+ const std::shared_ptr<arrow::DataType>& type,
+ const std::shared_ptr<arrow::ArrayBuilder>& builder);
+
+ARROW_EXPORT
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out);
+
+} // namespace py
+} // namespace arrow
+
+#endif // ARROW_PYTHON_ADAPTERS_BUILTIN_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/common.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
new file mode 100644
index 0000000..a5aea30
--- /dev/null
+++ b/cpp/src/arrow/python/common.cc
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/common.h"
+
+#include <cstdlib>
+#include <mutex>
+#include <sstream>
+
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace py {
+
+static std::mutex memory_pool_mutex;
+static MemoryPool* default_python_pool = nullptr;
+
+void set_default_memory_pool(MemoryPool* pool) {
+ std::lock_guard<std::mutex> guard(memory_pool_mutex);
+ default_python_pool = pool;
+}
+
+MemoryPool* get_memory_pool() {
+ std::lock_guard<std::mutex> guard(memory_pool_mutex);
+ if (default_python_pool) {
+ return default_python_pool;
+ } else {
+ return default_memory_pool();
+ }
+}
+
+// ----------------------------------------------------------------------
+// PyBuffer
+
+PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0) {
+ if (PyObject_CheckBuffer(obj)) {
+ obj_ = PyMemoryView_FromObject(obj);
+ Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_);
+ data_ = reinterpret_cast<const uint8_t*>(buffer->buf);
+ size_ = buffer->len;
+ capacity_ = buffer->len;
+ is_mutable_ = false;
+ Py_INCREF(obj_);
+ }
+}
+
+PyBuffer::~PyBuffer() {
+ PyAcquireGIL lock;
+ Py_DECREF(obj_);
+}
+
+} // namespace py
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
new file mode 100644
index 0000000..f1be471
--- /dev/null
+++ b/cpp/src/arrow/python/common.h
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_COMMON_H
+#define ARROW_PYTHON_COMMON_H
+
+#include <string>
+
+#include "arrow/python/config.h"
+
+#include "arrow/buffer.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace py {
+
+class PyAcquireGIL {
+ public:
+ PyAcquireGIL() { state_ = PyGILState_Ensure(); }
+
+ ~PyAcquireGIL() { PyGILState_Release(state_); }
+
+ private:
+ PyGILState_STATE state_;
+ DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
+};
+
+#define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2
+
+class OwnedRef {
+ public:
+ OwnedRef() : obj_(nullptr) {}
+
+ explicit OwnedRef(PyObject* obj) : obj_(obj) {}
+
+ ~OwnedRef() {
+ PyAcquireGIL lock;
+ Py_XDECREF(obj_);
+ }
+
+ void reset(PyObject* obj) {
+ if (obj_ != nullptr) { Py_XDECREF(obj_); }
+ obj_ = obj;
+ }
+
+ void release() { obj_ = nullptr; }
+
+ PyObject* obj() const { return obj_; }
+
+ private:
+ PyObject* obj_;
+};
+
+struct PyObjectStringify {
+ OwnedRef tmp_obj;
+ const char* bytes;
+
+ explicit PyObjectStringify(PyObject* obj) {
+ PyObject* bytes_obj;
+ if (PyUnicode_Check(obj)) {
+ bytes_obj = PyUnicode_AsUTF8String(obj);
+ tmp_obj.reset(bytes_obj);
+ } else {
+ bytes_obj = obj;
+ }
+ bytes = PyBytes_AsString(bytes_obj);
+ }
+};
+
+// TODO(wesm): We can just let errors pass through. To be explored later
+#define RETURN_IF_PYERROR() \
+ if (PyErr_Occurred()) { \
+ PyObject *exc_type, *exc_value, *traceback; \
+ PyErr_Fetch(&exc_type, &exc_value, &traceback); \
+ PyObjectStringify stringified(exc_value); \
+ std::string message(stringified.bytes); \
+ Py_DECREF(exc_type); \
+ Py_XDECREF(exc_value); \
+ Py_XDECREF(traceback); \
+ PyErr_Clear(); \
+ return Status::UnknownError(message); \
+ }
+
+// Return the common PyArrow memory pool
+ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);
+ARROW_EXPORT MemoryPool* get_memory_pool();
+
+class ARROW_EXPORT NumPyBuffer : public Buffer {
+ public:
+ explicit NumPyBuffer(PyArrayObject* arr) : Buffer(nullptr, 0) {
+ arr_ = arr;
+ Py_INCREF(arr);
+
+ data_ = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
+ size_ = PyArray_SIZE(arr_) * PyArray_DESCR(arr_)->elsize;
+ capacity_ = size_;
+ }
+
+ virtual ~NumPyBuffer() { Py_XDECREF(arr_); }
+
+ private:
+ PyArrayObject* arr_;
+};
+
+class ARROW_EXPORT PyBuffer : public Buffer {
+ public:
+ /// Note that the GIL must be held when calling the PyBuffer constructor.
+ ///
+ /// While memoryview objects support multi-demensional buffers, PyBuffer only supports
+ /// one-dimensional byte buffers.
+ explicit PyBuffer(PyObject* obj);
+ ~PyBuffer();
+
+ private:
+ PyObject* obj_;
+};
+
+} // namespace py
+} // namespace arrow
+
+#endif // ARROW_PYTHON_COMMON_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/config.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/config.cc b/cpp/src/arrow/python/config.cc
new file mode 100644
index 0000000..2abc4dd
--- /dev/null
+++ b/cpp/src/arrow/python/config.cc
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <Python.h>
+
+#include "arrow/python/config.h"
+
+namespace arrow {
+namespace py {
+
+void Init() {}
+
+PyObject* numpy_nan = nullptr;
+
+void set_numpy_nan(PyObject* obj) {
+ Py_INCREF(obj);
+ numpy_nan = obj;
+}
+
+} // namespace py
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/config.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/config.h b/cpp/src/arrow/python/config.h
new file mode 100644
index 0000000..dd554e0
--- /dev/null
+++ b/cpp/src/arrow/python/config.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_CONFIG_H
+#define ARROW_PYTHON_CONFIG_H
+
+#include <Python.h>
+
+#include "arrow/python/numpy_interop.h"
+#include "arrow/util/visibility.h"
+
+#if PY_MAJOR_VERSION >= 3
+#define PyString_Check PyUnicode_Check
+#endif
+
+namespace arrow {
+namespace py {
+
+ARROW_EXPORT
+extern PyObject* numpy_nan;
+
+ARROW_EXPORT
+void Init();
+
+ARROW_EXPORT
+void set_numpy_nan(PyObject* obj);
+
+} // namespace py
+} // namespace arrow
+
+#endif // ARROW_PYTHON_CONFIG_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/do_import_numpy.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/do_import_numpy.h b/cpp/src/arrow/python/do_import_numpy.h
new file mode 100644
index 0000000..bb4a382
--- /dev/null
+++ b/cpp/src/arrow/python/do_import_numpy.h
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Trick borrowed from dynd-python for initializing the NumPy array API
+
+// Trigger the array import (inversion of NO_IMPORT_ARRAY)
+#define NUMPY_IMPORT_ARRAY
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
new file mode 100644
index 0000000..add2d9a
--- /dev/null
+++ b/cpp/src/arrow/python/helpers.cc
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/helpers.h"
+
+#include <arrow/api.h>
+
+namespace arrow {
+namespace py {
+
+#define GET_PRIMITIVE_TYPE(NAME, FACTORY) \
+ case Type::NAME: \
+ return FACTORY(); \
+ break;
+
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
+ switch (type) {
+ case Type::NA:
+ return null();
+ GET_PRIMITIVE_TYPE(UINT8, uint8);
+ GET_PRIMITIVE_TYPE(INT8, int8);
+ GET_PRIMITIVE_TYPE(UINT16, uint16);
+ GET_PRIMITIVE_TYPE(INT16, int16);
+ GET_PRIMITIVE_TYPE(UINT32, uint32);
+ GET_PRIMITIVE_TYPE(INT32, int32);
+ GET_PRIMITIVE_TYPE(UINT64, uint64);
+ GET_PRIMITIVE_TYPE(INT64, int64);
+ GET_PRIMITIVE_TYPE(DATE32, date32);
+ GET_PRIMITIVE_TYPE(DATE64, date64);
+ GET_PRIMITIVE_TYPE(BOOL, boolean);
+ GET_PRIMITIVE_TYPE(FLOAT, float32);
+ GET_PRIMITIVE_TYPE(DOUBLE, float64);
+ GET_PRIMITIVE_TYPE(BINARY, binary);
+ GET_PRIMITIVE_TYPE(STRING, utf8);
+ default:
+ return nullptr;
+ }
+}
+
+} // namespace py
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/helpers.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
new file mode 100644
index 0000000..611e814
--- /dev/null
+++ b/cpp/src/arrow/python/helpers.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_HELPERS_H
+#define PYARROW_HELPERS_H
+
+#include <memory>
+
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace py {
+
+ARROW_EXPORT
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+
+} // namespace py
+} // namespace arrow
+
+#endif // PYARROW_HELPERS_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/io.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
new file mode 100644
index 0000000..ba82a45
--- /dev/null
+++ b/cpp/src/arrow/python/io.cc
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/io.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <string>
+
+#include "arrow/io/memory.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+namespace py {
+
+// ----------------------------------------------------------------------
+// Python file
+
+PythonFile::PythonFile(PyObject* file) : file_(file) {
+ Py_INCREF(file_);
+}
+
+PythonFile::~PythonFile() {
+ Py_DECREF(file_);
+}
+
+static Status CheckPyError() {
+ if (PyErr_Occurred()) {
+ PyObject *exc_type, *exc_value, *traceback;
+ PyErr_Fetch(&exc_type, &exc_value, &traceback);
+ PyObjectStringify stringified(exc_value);
+ std::string message(stringified.bytes);
+ Py_XDECREF(exc_type);
+ Py_XDECREF(exc_value);
+ Py_XDECREF(traceback);
+ PyErr_Clear();
+ return Status::IOError(message);
+ }
+ return Status::OK();
+}
+
+// This is annoying: because C++11 does not allow implicit conversion of string
+// literals to non-const char*, we need to go through some gymnastics to use
+// PyObject_CallMethod without a lot of pain (its arguments are non-const
+// char*)
+template <typename... ArgTypes>
+static inline PyObject* cpp_PyObject_CallMethod(
+ PyObject* obj, const char* method_name, const char* argspec, ArgTypes... args) {
+ return PyObject_CallMethod(
+ obj, const_cast<char*>(method_name), const_cast<char*>(argspec), args...);
+}
+
+Status PythonFile::Close() {
+ // whence: 0 for relative to start of file, 2 for end of file
+ PyObject* result = cpp_PyObject_CallMethod(file_, "close", "()");
+ Py_XDECREF(result);
+ ARROW_RETURN_NOT_OK(CheckPyError());
+ return Status::OK();
+}
+
+Status PythonFile::Seek(int64_t position, int whence) {
+ // whence: 0 for relative to start of file, 2 for end of file
+ PyObject* result = cpp_PyObject_CallMethod(file_, "seek", "(ii)", position, whence);
+ Py_XDECREF(result);
+ ARROW_RETURN_NOT_OK(CheckPyError());
+ return Status::OK();
+}
+
+Status PythonFile::Read(int64_t nbytes, PyObject** out) {
+ PyObject* result = cpp_PyObject_CallMethod(file_, "read", "(i)", nbytes);
+ ARROW_RETURN_NOT_OK(CheckPyError());
+ *out = result;
+ return Status::OK();
+}
+
+Status PythonFile::Write(const uint8_t* data, int64_t nbytes) {
+ PyObject* py_data =
+ PyBytes_FromStringAndSize(reinterpret_cast<const char*>(data), nbytes);
+ ARROW_RETURN_NOT_OK(CheckPyError());
+
+ PyObject* result = cpp_PyObject_CallMethod(file_, "write", "(O)", py_data);
+ Py_XDECREF(py_data);
+ Py_XDECREF(result);
+ ARROW_RETURN_NOT_OK(CheckPyError());
+ return Status::OK();
+}
+
+Status PythonFile::Tell(int64_t* position) {
+ PyObject* result = cpp_PyObject_CallMethod(file_, "tell", "()");
+ ARROW_RETURN_NOT_OK(CheckPyError());
+
+ *position = PyLong_AsLongLong(result);
+ Py_DECREF(result);
+
+ // PyLong_AsLongLong can raise OverflowError
+ ARROW_RETURN_NOT_OK(CheckPyError());
+
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Seekable input stream
+
+PyReadableFile::PyReadableFile(PyObject* file) {
+ file_.reset(new PythonFile(file));
+}
+
+PyReadableFile::~PyReadableFile() {}
+
+Status PyReadableFile::Close() {
+ PyAcquireGIL lock;
+ return file_->Close();
+}
+
+Status PyReadableFile::Seek(int64_t position) {
+ PyAcquireGIL lock;
+ return file_->Seek(position, 0);
+}
+
+Status PyReadableFile::Tell(int64_t* position) {
+ PyAcquireGIL lock;
+ return file_->Tell(position);
+}
+
+Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) {
+ PyAcquireGIL lock;
+ PyObject* bytes_obj;
+ ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj));
+
+ *bytes_read = PyBytes_GET_SIZE(bytes_obj);
+ std::memcpy(out, PyBytes_AS_STRING(bytes_obj), *bytes_read);
+ Py_DECREF(bytes_obj);
+
+ return Status::OK();
+}
+
+Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
+ PyAcquireGIL lock;
+
+ PyObject* bytes_obj;
+ ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj));
+
+ *out = std::make_shared<PyBuffer>(bytes_obj);
+ Py_DECREF(bytes_obj);
+
+ return Status::OK();
+}
+
+Status PyReadableFile::GetSize(int64_t* size) {
+ PyAcquireGIL lock;
+
+ int64_t current_position;
+
+ ARROW_RETURN_NOT_OK(file_->Tell(¤t_position));
+
+ ARROW_RETURN_NOT_OK(file_->Seek(0, 2));
+
+ int64_t file_size;
+ ARROW_RETURN_NOT_OK(file_->Tell(&file_size));
+
+ // Restore previous file position
+ ARROW_RETURN_NOT_OK(file_->Seek(current_position, 0));
+
+ *size = file_size;
+ return Status::OK();
+}
+
+bool PyReadableFile::supports_zero_copy() const {
+ return false;
+}
+
+// ----------------------------------------------------------------------
+// Output stream
+
+PyOutputStream::PyOutputStream(PyObject* file) {
+ file_.reset(new PythonFile(file));
+}
+
+PyOutputStream::~PyOutputStream() {}
+
+Status PyOutputStream::Close() {
+ PyAcquireGIL lock;
+ return file_->Close();
+}
+
+Status PyOutputStream::Tell(int64_t* position) {
+ PyAcquireGIL lock;
+ return file_->Tell(position);
+}
+
+Status PyOutputStream::Write(const uint8_t* data, int64_t nbytes) {
+ PyAcquireGIL lock;
+ return file_->Write(data, nbytes);
+}
+
+// ----------------------------------------------------------------------
+// A readable file that is backed by a PyBuffer
+
+PyBytesReader::PyBytesReader(PyObject* obj)
+ : io::BufferReader(std::make_shared<PyBuffer>(obj)) {}
+
+PyBytesReader::~PyBytesReader() {}
+
+} // namespace py
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/io.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
new file mode 100644
index 0000000..905bd6c
--- /dev/null
+++ b/cpp/src/arrow/python/io.h
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_IO_H
+#define PYARROW_IO_H
+
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/visibility.h"
+
+#include "arrow/python/config.h"
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace py {
+
+// A common interface to a Python file-like object. Must acquire GIL before
+// calling any methods
+class PythonFile {
+ public:
+ explicit PythonFile(PyObject* file);
+ ~PythonFile();
+
+ Status Close();
+ Status Seek(int64_t position, int whence);
+ Status Read(int64_t nbytes, PyObject** out);
+ Status Tell(int64_t* position);
+ Status Write(const uint8_t* data, int64_t nbytes);
+
+ private:
+ PyObject* file_;
+};
+
+class ARROW_EXPORT PyReadableFile : public io::RandomAccessFile {
+ public:
+ explicit PyReadableFile(PyObject* file);
+ virtual ~PyReadableFile();
+
+ Status Close() override;
+
+ Status Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) override;
+ Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
+
+ Status GetSize(int64_t* size) override;
+
+ Status Seek(int64_t position) override;
+
+ Status Tell(int64_t* position) override;
+
+ bool supports_zero_copy() const override;
+
+ private:
+ std::unique_ptr<PythonFile> file_;
+};
+
+class ARROW_EXPORT PyOutputStream : public io::OutputStream {
+ public:
+ explicit PyOutputStream(PyObject* file);
+ virtual ~PyOutputStream();
+
+ Status Close() override;
+ Status Tell(int64_t* position) override;
+ Status Write(const uint8_t* data, int64_t nbytes) override;
+
+ private:
+ std::unique_ptr<PythonFile> file_;
+};
+
+// A zero-copy reader backed by a PyBuffer object
+class ARROW_EXPORT PyBytesReader : public io::BufferReader {
+ public:
+ explicit PyBytesReader(PyObject* obj);
+ virtual ~PyBytesReader();
+};
+
+// TODO(wesm): seekable output files
+
+} // namespace py
+} // namespace arrow
+
+#endif // PYARROW_IO_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/numpy_interop.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/numpy_interop.h b/cpp/src/arrow/python/numpy_interop.h
new file mode 100644
index 0000000..0a4b425
--- /dev/null
+++ b/cpp/src/arrow/python/numpy_interop.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_NUMPY_INTEROP_H
+#define PYARROW_NUMPY_INTEROP_H
+
+#include <Python.h>
+
+#include <numpy/numpyconfig.h>
+
+// Don't use the deprecated Numpy functions
+#ifdef NPY_1_7_API_VERSION
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#else
+#define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
+#define NPY_ARRAY_ALIGNED NPY_ALIGNED
+#define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
+#define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
+#endif
+
+// This is required to be able to access the NumPy C API properly in C++ files
+// other than this main one
+#define PY_ARRAY_UNIQUE_SYMBOL arrow_ARRAY_API
+#ifndef NUMPY_IMPORT_ARRAY
+#define NO_IMPORT_ARRAY
+#endif
+
+#include <numpy/arrayobject.h>
+#include <numpy/ufuncobject.h>
+
+namespace arrow {
+namespace py {
+
+inline int import_numpy() {
+#ifdef NUMPY_IMPORT_ARRAY
+ import_array1(-1);
+ import_umath1(-1);
+#endif
+
+ return 0;
+}
+
+} // namespace py
+} // namespace arrow
+
+#endif // PYARROW_NUMPY_INTEROP_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/pandas-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas-test.cc b/cpp/src/arrow/python/pandas-test.cc
new file mode 100644
index 0000000..ae2527e
--- /dev/null
+++ b/cpp/src/arrow/python/pandas-test.cc
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/python/pandas_convert.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+namespace arrow {
+namespace py {
+
+TEST(PandasConversionTest, TestObjectBlockWriteFails) {
+ StringBuilder builder(default_memory_pool());
+ const char value[] = {'\xf1', '\0'};
+
+ for (int i = 0; i < 1000; ++i) {
+ builder.Append(value, strlen(value));
+ }
+
+ std::shared_ptr<Array> arr;
+ ASSERT_OK(builder.Finish(&arr));
+
+ auto f1 = field("f1", utf8());
+ auto f2 = field("f2", utf8());
+ auto f3 = field("f3", utf8());
+ std::vector<std::shared_ptr<Field>> fields = {f1, f2, f3};
+ std::vector<std::shared_ptr<Column>> cols = {std::make_shared<Column>(f1, arr),
+ std::make_shared<Column>(f2, arr), std::make_shared<Column>(f3, arr)};
+
+ auto schema = std::make_shared<Schema>(fields);
+ auto table = std::make_shared<Table>("", schema, cols);
+
+ PyObject* out;
+ Py_BEGIN_ALLOW_THREADS;
+ ASSERT_RAISES(UnknownError, ConvertTableToPandas(table, 2, &out));
+ Py_END_ALLOW_THREADS;
+}
+
+} // namespace py
+} // namespace arrow