You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/26 15:45:45 UTC
[3/5] arrow git commit: ARROW-341: [Python] Move pyarrow's C++ code
to the main C++ source tree, install libarrow_python and headers
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index ef874e3..35a1a89 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -47,9 +47,6 @@ endif()
# Top level cmake dir
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
- option(PYARROW_BUILD_TESTS
- "Build the PyArrow C++ googletest unit tests"
- OFF)
option(PYARROW_BUILD_PARQUET
"Build the PyArrow Parquet integration"
OFF)
@@ -57,7 +54,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Build the PyArrow jemalloc integration"
OFF)
option(PYARROW_BUNDLE_ARROW_CPP
- "Bundle the Arrow C++ libraries"
+ "Bundle the Arrow C++ libraries"
OFF)
endif()
@@ -75,6 +72,8 @@ endif(CCACHE_FOUND)
# Compiler flags
############################################################
+include(BuildUtils)
+include(CompilerInfo)
include(SetupCxxFlags)
# Add common flags
@@ -86,8 +85,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
# Suppress Cython warnings
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable")
-# Determine compiler version
-include(CompilerInfo)
if ("${COMPILER_FAMILY}" STREQUAL "clang")
# Using Clang with ccache causes a bunch of spurious warnings that are
@@ -216,115 +213,8 @@ include_directories(SYSTEM
src)
############################################################
-# Testing
-############################################################
-
-# Add a new test case, with or without an executable that should be built.
-#
-# REL_TEST_NAME is the name of the test. It may be a single component
-# (e.g. monotime-test) or contain additional components (e.g.
-# net/net_util-test). Either way, the last component must be a globally
-# unique name.
-#
-# Arguments after the test name will be passed to set_tests_properties().
-function(ADD_PYARROW_TEST REL_TEST_NAME)
- if(NO_TESTS)
- return()
- endif()
- get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
-
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc)
- # This test has a corresponding .cc file, set it up as an executable.
- set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
- add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
- target_link_libraries(${TEST_NAME} ${PYARROW_TEST_LINK_LIBS})
- else()
- # No executable, just invoke the test (probably a script) directly.
- set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
- endif()
-
- add_test(${TEST_NAME}
- ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
- if(ARGN)
- set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
- endif()
-endfunction()
-
-# A wrapper for add_dependencies() that is compatible with NO_TESTS.
-function(ADD_PYARROW_TEST_DEPENDENCIES REL_TEST_NAME)
- if(NO_TESTS)
- return()
- endif()
- get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
-
- add_dependencies(${TEST_NAME} ${ARGN})
-endfunction()
-
-enable_testing()
-
-############################################################
# Dependencies
############################################################
-function(ADD_THIRDPARTY_LIB LIB_NAME)
- set(options)
- set(one_value_args SHARED_LIB STATIC_LIB)
- set(multi_value_args DEPS)
- cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
- if(ARG_UNPARSED_ARGUMENTS)
- message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
- endif()
-
- if(("${PYARROW_LINK}" STREQUAL "s" AND ARG_STATIC_LIB) OR (NOT ARG_SHARED_LIB))
- if(NOT ARG_STATIC_LIB)
- message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
- endif()
- add_library(${LIB_NAME} STATIC IMPORTED)
- set_target_properties(${LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
- message(STATUS "Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
- else()
- add_library(${LIB_NAME} SHARED IMPORTED)
- set_target_properties(${LIB_NAME}
- PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
- message(STATUS "Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
- endif()
-
- if(ARG_DEPS)
- set_target_properties(${LIB_NAME}
- PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
- endif()
-
- # Set up an "exported variant" for this thirdparty library (see "Visibility"
- # above). It's the same as the real target, just with an "_exported" suffix.
- # We prefer the static archive if it exists (as it's akin to an "internal"
- # library), but we'll settle for the shared object if we must.
- #
- # A shared object exported variant will force any "leaf" library that
- # transitively depends on it to also depend on it at runtime; this is
- # desirable for some libraries (e.g. cyrus_sasl).
- set(LIB_NAME_EXPORTED ${LIB_NAME}_exported)
- if(ARG_STATIC_LIB)
- add_library(${LIB_NAME_EXPORTED} STATIC IMPORTED)
- set_target_properties(${LIB_NAME_EXPORTED}
- PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
- else()
- add_library(${LIB_NAME_EXPORTED} SHARED IMPORTED)
- set_target_properties(${LIB_NAME_EXPORTED}
- PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
- endif()
- if(ARG_DEPS)
- set_target_properties(${LIB_NAME_EXPORTED}
- PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
- endif()
-endfunction()
-
-## GMock
-if (PYARROW_BUILD_TESTS)
- find_package(GTest REQUIRED)
- include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
- ADD_THIRDPARTY_LIB(gtest
- STATIC_LIB ${GTEST_STATIC_LIB})
-endif()
## Parquet
find_package(Parquet)
@@ -352,6 +242,8 @@ if (PYARROW_BUNDLE_ARROW_CPP)
COPYONLY)
SET(ARROW_IPC_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_ipc${CMAKE_SHARED_LIBRARY_SUFFIX})
+ SET(ARROW_PYTHON_SHARED_LIB
+ ${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_python${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
ADD_THIRDPARTY_LIB(arrow
@@ -360,66 +252,8 @@ ADD_THIRDPARTY_LIB(arrow_io
SHARED_LIB ${ARROW_IO_SHARED_LIB})
ADD_THIRDPARTY_LIB(arrow_ipc
SHARED_LIB ${ARROW_IPC_SHARED_LIB})
-
-############################################################
-# Linker setup
-############################################################
-
-set(PYARROW_MIN_TEST_LIBS
- pyarrow_test_main
- pyarrow)
-
-set(PYARROW_MIN_TEST_LIBS
- pyarrow_test_main
- pyarrow
- ${PYARROW_BASE_LIBS})
-
-if(NOT APPLE AND PYARROW_BUILD_TESTS)
- ADD_THIRDPARTY_LIB(python
- SHARED_LIB "${PYTHON_LIBRARIES}")
- list(APPEND PYARROW_MIN_TEST_LIBS python)
-endif()
-
-set(PYARROW_TEST_LINK_LIBS ${PYARROW_MIN_TEST_LIBS})
-
-############################################################
-# "make ctags" target
-############################################################
-if (UNIX)
- add_custom_target(ctags ctags -R --languages=c++,c --exclude=thirdparty/installed)
-endif (UNIX)
-
-############################################################
-# "make etags" target
-############################################################
-if (UNIX)
- add_custom_target(tags etags --members --declarations
- `find ${CMAKE_CURRENT_SOURCE_DIR}/src
- -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or -name \\*.h -or -name \\*.c -or
- -name \\*.f`)
- add_custom_target(etags DEPENDS tags)
-endif (UNIX)
-
-############################################################
-# "make cscope" target
-############################################################
-if (UNIX)
- add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR}
- ( -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or
- -name \\*.h -or -name \\*.c -or -name \\*.f )
- -exec echo \"{}\" \; > cscope.files && cscope -q -b VERBATIM)
-endif (UNIX)
-
-############################################################
-# "make lint" target
-############################################################
-if (UNIX)
- # Full lint
- add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
- --verbose=2
- --filter=-whitespace/comments,-readability/todo,-build/header_guard
- `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
-endif (UNIX)
+ADD_THIRDPARTY_LIB(arrow_python
+ SHARED_LIB ${ARROW_PYTHON_SHARED_LIB})
############################################################
# Subdirectories
@@ -429,9 +263,6 @@ if (UNIX)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
endif()
-add_subdirectory(src/pyarrow)
-add_subdirectory(src/pyarrow/util)
-
set(CYTHON_EXTENSIONS
array
config
@@ -444,19 +275,11 @@ set(CYTHON_EXTENSIONS
table
)
-set(PYARROW_SRCS
- src/pyarrow/common.cc
- src/pyarrow/config.cc
- src/pyarrow/helpers.cc
- src/pyarrow/io.cc
- src/pyarrow/adapters/builtin.cc
- src/pyarrow/adapters/pandas.cc
-)
-
set(LINK_LIBS
- arrow
- arrow_io
- arrow_ipc
+ arrow_shared
+ arrow_io_shared
+ arrow_ipc_shared
+ arrow_python_shared
)
if (PYARROW_BUILD_PARQUET)
@@ -497,24 +320,12 @@ if (PYARROW_BUILD_JEMALLOC)
SHARED_LIB ${ARROW_JEMALLOC_SHARED_LIB})
set(LINK_LIBS
${LINK_LIBS}
- arrow_jemalloc)
+ arrow_jemalloc_shared)
set(CYTHON_EXTENSIONS
${CYTHON_EXTENSIONS}
jemalloc)
endif()
-add_library(pyarrow SHARED
- ${PYARROW_SRCS})
-if (PYARROW_BUNDLE_ARROW_CPP)
- set_target_properties(pyarrow PROPERTIES
- INSTALL_RPATH "\$ORIGIN")
-endif()
-target_link_libraries(pyarrow ${LINK_LIBS})
-
-if(APPLE)
- set_target_properties(pyarrow PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-endif()
-
############################################################
# Setup and build Cython modules
############################################################
@@ -555,5 +366,5 @@ foreach(module ${CYTHON_EXTENSIONS})
set_target_properties(${module_name} PROPERTIES
INSTALL_RPATH ${module_install_rpath})
- target_link_libraries(${module_name} pyarrow)
+ target_link_libraries(${module_name} ${LINK_LIBS})
endforeach(module)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/cmake_modules/FindArrow.cmake
----------------------------------------------------------------------
diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake
index 5d0207d..5030c9c 100644
--- a/python/cmake_modules/FindArrow.cmake
+++ b/python/cmake_modules/FindArrow.cmake
@@ -57,12 +57,18 @@ find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc
${ARROW_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)
+find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python
+ PATHS
+ ${ARROW_SEARCH_LIB_PATH}
+ NO_DEFAULT_PATH)
+
if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH)
set(ARROW_FOUND TRUE)
set(ARROW_LIB_NAME libarrow)
set(ARROW_IO_LIB_NAME libarrow_io)
set(ARROW_IPC_LIB_NAME libarrow_ipc)
set(ARROW_JEMALLOC_LIB_NAME libarrow_jemalloc)
+ set(ARROW_PYTHON_LIB_NAME libarrow_python)
set(ARROW_LIBS ${ARROW_SEARCH_LIB_PATH})
set(ARROW_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_LIB_NAME}.a)
@@ -77,6 +83,9 @@ if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH)
set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_JEMALLOC_LIB_NAME}.a)
set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_LIBS}/${ARROW_JEMALLOC_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+ set(ARROW_PYTHON_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_PYTHON_LIB_NAME}.a)
+ set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+
if (NOT Arrow_FIND_QUIETLY)
message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}")
message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}")
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/cmake_modules/FindNumPy.cmake
----------------------------------------------------------------------
diff --git a/python/cmake_modules/FindNumPy.cmake b/python/cmake_modules/FindNumPy.cmake
deleted file mode 100644
index 58bb531..0000000
--- a/python/cmake_modules/FindNumPy.cmake
+++ /dev/null
@@ -1,100 +0,0 @@
-# - Find the NumPy libraries
-# This module finds if NumPy is installed, and sets the following variables
-# indicating where it is.
-#
-# TODO: Update to provide the libraries and paths for linking npymath lib.
-#
-# NUMPY_FOUND - was NumPy found
-# NUMPY_VERSION - the version of NumPy found as a string
-# NUMPY_VERSION_MAJOR - the major version number of NumPy
-# NUMPY_VERSION_MINOR - the minor version number of NumPy
-# NUMPY_VERSION_PATCH - the patch version number of NumPy
-# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
-# NUMPY_INCLUDE_DIRS - path to the NumPy include files
-
-#============================================================================
-# Copyright 2012 Continuum Analytics, Inc.
-#
-# MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files
-# (the "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to permit
-# persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#============================================================================
-
-# Finding NumPy involves calling the Python interpreter
-if(NumPy_FIND_REQUIRED)
- find_package(PythonInterp REQUIRED)
-else()
- find_package(PythonInterp)
-endif()
-
-if(NOT PYTHONINTERP_FOUND)
- set(NUMPY_FOUND FALSE)
- return()
-endif()
-
-execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
- "import numpy as n; print(n.__version__); print(n.get_include());"
- RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS
- OUTPUT_VARIABLE _NUMPY_VALUES_OUTPUT
- ERROR_VARIABLE _NUMPY_ERROR_VALUE
- OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0)
- if(NumPy_FIND_REQUIRED)
- message(FATAL_ERROR
- "NumPy import failure:\n${_NUMPY_ERROR_VALUE}")
- endif()
- set(NUMPY_FOUND FALSE)
- return()
-endif()
-
-# Convert the process output into a list
-string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES_OUTPUT})
-string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES})
-list(GET _NUMPY_VALUES 0 NUMPY_VERSION)
-list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS)
-
-string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" _VER_CHECK "${NUMPY_VERSION}")
-if("${_VER_CHECK}" STREQUAL "")
- # The output from Python was unexpected. Raise an error always
- # here, because we found NumPy, but it appears to be corrupted somehow.
- message(FATAL_ERROR
- "Requested version and include path from NumPy, got instead:\n${_NUMPY_VALUES_OUTPUT}\n")
- return()
-endif()
-
-# Make sure all directory separators are '/'
-string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS})
-
-# Get the major and minor version numbers
-string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION})
-list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR)
-list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR)
-list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH)
-string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH})
-math(EXPR NUMPY_VERSION_DECIMAL
- "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
-
-find_package_message(NUMPY
- "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}"
- "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}")
-
-set(NUMPY_FOUND TRUE)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/cmake_modules/FindPythonLibsNew.cmake
----------------------------------------------------------------------
diff --git a/python/cmake_modules/FindPythonLibsNew.cmake b/python/cmake_modules/FindPythonLibsNew.cmake
deleted file mode 100644
index 1000a95..0000000
--- a/python/cmake_modules/FindPythonLibsNew.cmake
+++ /dev/null
@@ -1,241 +0,0 @@
-# - Find python libraries
-# This module finds the libraries corresponding to the Python interpeter
-# FindPythonInterp provides.
-# This code sets the following variables:
-#
-# PYTHONLIBS_FOUND - have the Python libs been found
-# PYTHON_PREFIX - path to the Python installation
-# PYTHON_LIBRARIES - path to the python library
-# PYTHON_INCLUDE_DIRS - path to where Python.h is found
-# PYTHON_SITE_PACKAGES - path to installation site-packages
-# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build
-#
-# PYTHON_INCLUDE_PATH - path to where Python.h is found (deprecated)
-#
-# A function PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is defined
-# to build modules for python.
-#
-# Thanks to talljimbo for the patch adding the 'LDVERSION' config
-# variable usage.
-
-#=============================================================================
-# Copyright 2001-2009 Kitware, Inc.
-# Copyright 2012-2014 Continuum Analytics, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# * Neither the names of Kitware, Inc., the Insight Software Consortium,
-# nor the names of their contributors may be used to endorse or promote
-# products derived from this software without specific prior written
-# permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-# License text for the above reference.)
-
-# Use the Python interpreter to find the libs.
-if(PythonLibsNew_FIND_REQUIRED)
- find_package(PythonInterp REQUIRED)
-else()
- find_package(PythonInterp)
-endif()
-
-if(NOT PYTHONINTERP_FOUND)
- set(PYTHONLIBS_FOUND FALSE)
- return()
-endif()
-
-# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
-# testing whether sys has the gettotalrefcount function is a reliable,
-# cross-platform way to detect a CPython debug interpreter.
-#
-# The library suffix is from the config var LDVERSION sometimes, otherwise
-# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
-#
-# The config var LIBPL is for Linux, and helps on Debian Jessie where the
-# addition of multi-arch support shuffled things around.
-execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
- "from distutils import sysconfig as s;import sys;import struct;
-print('.'.join(str(v) for v in sys.version_info));
-print(sys.prefix);
-print(s.get_python_inc(plat_specific=True));
-print(s.get_python_lib(plat_specific=True));
-print(s.get_config_var('SO'));
-print(hasattr(sys, 'gettotalrefcount')+0);
-print(struct.calcsize('@P'));
-print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
-print(s.get_config_var('LIBPL'));
-"
- RESULT_VARIABLE _PYTHON_SUCCESS
- OUTPUT_VARIABLE _PYTHON_VALUES
- ERROR_VARIABLE _PYTHON_ERROR_VALUE
- OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-if(NOT _PYTHON_SUCCESS MATCHES 0)
- if(PythonLibsNew_FIND_REQUIRED)
- message(FATAL_ERROR
- "Python config failure:\n${_PYTHON_ERROR_VALUE}")
- endif()
- set(PYTHONLIBS_FOUND FALSE)
- return()
-endif()
-
-# Convert the process output into a list
-string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
-string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
-list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
-list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
-list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
-list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
-list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
-list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
-list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
-list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
-list(GET _PYTHON_VALUES 8 PYTHON_LIBRARY_PATH)
-
-# Make sure the Python has the same pointer-size as the chosen compiler
-# Skip the check on OS X, it doesn't consistently have CMAKE_SIZEOF_VOID_P defined
-if((NOT APPLE) AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
- if(PythonLibsNew_FIND_REQUIRED)
- math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
- math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
- message(FATAL_ERROR
- "Python config failure: Python is ${_PYTHON_BITS}-bit, "
- "chosen compiler is ${_CMAKE_BITS}-bit")
- endif()
- set(PYTHONLIBS_FOUND FALSE)
- return()
-endif()
-
-# The built-in FindPython didn't always give the version numbers
-string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
-list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
-list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
-list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
-
-# Make sure all directory separators are '/'
-string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
-string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
-string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
-
-if(CMAKE_HOST_WIN32)
- if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
- set(PYTHON_LIBRARY
- "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
- else()
- set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/libpython${PYTHON_LIBRARY_SUFFIX}.a")
- endif()
-elseif(APPLE)
- # Seems to require "-undefined dynamic_lookup" instead of linking
- # against the .dylib, otherwise it crashes. This flag is added
- # below
- set(PYTHON_LIBRARY "")
- #set(PYTHON_LIBRARY
- # "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib")
-else()
- if(${PYTHON_SIZEOF_VOID_P} MATCHES 8)
- set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}")
- else()
- set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}")
- endif()
- message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
- message(STATUS "Looking for python${PYTHON_LIBRARY_SUFFIX}")
- # Probably this needs to be more involved. It would be nice if the config
- # information the python interpreter itself gave us were more complete.
- find_library(PYTHON_LIBRARY
- NAMES "python${PYTHON_LIBRARY_SUFFIX}"
- PATHS ${_PYTHON_LIBS_SEARCH}
- NO_SYSTEM_ENVIRONMENT_PATH)
- message(STATUS "Found Python lib ${PYTHON_LIBRARY}")
-endif()
-
-# For backward compatibility, set PYTHON_INCLUDE_PATH, but make it internal.
-SET(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}" CACHE INTERNAL
- "Path to where Python.h is found (deprecated)")
-
-MARK_AS_ADVANCED(
- PYTHON_LIBRARY
- PYTHON_INCLUDE_DIR
-)
-
-# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
-# cache entries because they are meant to specify the location of a single
-# library. We now set the variables listed by the documentation for this
-# module.
-SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
-SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
-SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
-
-
-# Don't know how to get to this directory, just doing something simple :P
-#INCLUDE(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
-#FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs DEFAULT_MSG PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS)
-find_package_message(PYTHON
- "Found PythonLibs: ${PYTHON_LIBRARY}"
- "${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
-
-
-# PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is used to build modules for python.
-FUNCTION(PYTHON_ADD_MODULE _NAME )
- GET_PROPERTY(_TARGET_SUPPORTS_SHARED_LIBS
- GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
- OPTION(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE)
- OPTION(PYTHON_MODULE_${_NAME}_BUILD_SHARED
- "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS})
-
- # Mark these options as advanced
- MARK_AS_ADVANCED(PYTHON_ENABLE_MODULE_${_NAME}
- PYTHON_MODULE_${_NAME}_BUILD_SHARED)
-
- IF(PYTHON_ENABLE_MODULE_${_NAME})
- IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
- SET(PY_MODULE_TYPE MODULE)
- ELSE(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
- SET(PY_MODULE_TYPE STATIC)
- SET_PROPERTY(GLOBAL APPEND PROPERTY PY_STATIC_MODULES_LIST ${_NAME})
- ENDIF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
-
- SET_PROPERTY(GLOBAL APPEND PROPERTY PY_MODULES_LIST ${_NAME})
- ADD_LIBRARY(${_NAME} ${PY_MODULE_TYPE} ${ARGN})
- IF(APPLE)
- # On OS X, linking against the Python libraries causes
- # segfaults, so do this dynamic lookup instead.
- SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS
- "-undefined dynamic_lookup")
- ELSE()
- # In general, we should not link against libpython as we do not embed
- # the Python interpreter. The python binary itself can then define where
- # the symbols should loaded from.
- SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS
- "-Wl,-undefined,dynamic_lookup")
- ENDIF()
- IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
- SET_TARGET_PROPERTIES(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")
- SET_TARGET_PROPERTIES(${_NAME} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}")
- ELSE()
- ENDIF()
-
- ENDIF(PYTHON_ENABLE_MODULE_${_NAME})
-ENDFUNCTION(PYTHON_ADD_MODULE)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/pyarrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/config.pyx b/python/pyarrow/config.pyx
index 5ad7cf5..536f278 100644
--- a/python/pyarrow/config.pyx
+++ b/python/pyarrow/config.pyx
@@ -14,21 +14,21 @@
# distutils: language = c++
# cython: embedsignature = True
-cdef extern from 'pyarrow/do_import_numpy.h':
+cdef extern from 'arrow/python/do_import_numpy.h':
pass
-cdef extern from 'pyarrow/numpy_interop.h' namespace 'arrow::py':
+cdef extern from 'arrow/python/numpy_interop.h' namespace 'arrow::py':
int import_numpy()
-cdef extern from 'pyarrow/config.h' namespace 'arrow::py':
- void pyarrow_init()
- void pyarrow_set_numpy_nan(object o)
+cdef extern from 'arrow/python/config.h' namespace 'arrow::py':
+ void Init()
+ void set_numpy_nan(object o)
import_numpy()
-pyarrow_init()
+Init()
import numpy as np
-pyarrow_set_numpy_nan(np.nan)
+set_numpy_nan(np.nan)
import multiprocessing
import os
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index 3fdbebc..c3fdf4b 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -25,7 +25,7 @@ from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn,
cimport pyarrow.includes.libarrow_io as arrow_io
-cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil:
+cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
shared_ptr[CDataType] GetPrimitiveType(Type type)
shared_ptr[CDataType] GetTimestampType(TimeUnit unit)
CStatus ConvertPySequence(object obj, CMemoryPool* pool,
@@ -53,13 +53,9 @@ cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil:
void set_default_memory_pool(CMemoryPool* pool)
CMemoryPool* get_memory_pool()
-
-cdef extern from "pyarrow/common.h" namespace "arrow::py" nogil:
cdef cppclass PyBuffer(CBuffer):
PyBuffer(object o)
-
-cdef extern from "pyarrow/io.h" namespace "arrow::py" nogil:
cdef cppclass PyReadableFile(arrow_io.RandomAccessFile):
PyReadableFile(object fo)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index 9abf985..dae6cb2 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -186,7 +186,7 @@ class build_ext(_build_ext):
# a bit hacky
build_lib = saved_cwd
- # Move the built libpyarrow library to the place expected by the Python
+ # Move the libraries to the place expected by the Python
# build
shared_library_prefix = 'lib'
if sys.platform == 'darwin':
@@ -203,15 +203,16 @@ class build_ext(_build_ext):
pass
def move_lib(lib_name):
- lib_filename = shared_library_prefix + lib_name + shared_library_suffix
+ lib_filename = (shared_library_prefix + lib_name +
+ shared_library_suffix)
shutil.move(pjoin(self.build_type, lib_filename),
pjoin(build_lib, 'pyarrow', lib_filename))
- move_lib("pyarrow")
if self.bundle_arrow_cpp:
move_lib("arrow")
move_lib("arrow_io")
move_lib("arrow_ipc")
+ move_lib("arrow_python")
if self.with_jemalloc:
move_lib("arrow_jemalloc")
if self.with_parquet:
@@ -227,14 +228,14 @@ class build_ext(_build_ext):
if self._failure_permitted(name):
print('Cython module {0} failure permitted'.format(name))
continue
- raise RuntimeError('libpyarrow C-extension failed to build:',
+ raise RuntimeError('pyarrow C-extension failed to build:',
os.path.abspath(built_path))
ext_path = pjoin(build_lib, self._get_cmake_ext_path(name))
if os.path.exists(ext_path):
os.remove(ext_path)
self.mkpath(os.path.dirname(ext_path))
- print('Moving built libpyarrow C-extension', built_path,
+ print('Moving built C-extension', built_path,
'to build path', ext_path)
shutil.move(self.get_ext_built(name), ext_path)
self._found_names.append(name)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/CMakeLists.txt b/python/src/pyarrow/CMakeLists.txt
deleted file mode 100644
index 9e69718..0000000
--- a/python/src/pyarrow/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#######################################
-# Unit tests
-#######################################
-
-ADD_PYARROW_TEST(adapters/pandas-test)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
deleted file mode 100644
index 06e098a..0000000
--- a/python/src/pyarrow/adapters/builtin.cc
+++ /dev/null
@@ -1,527 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <Python.h>
-#include <datetime.h>
-#include <sstream>
-
-#include "pyarrow/adapters/builtin.h"
-
-#include "arrow/api.h"
-#include "arrow/status.h"
-
-#include "pyarrow/helpers.h"
-#include "pyarrow/util/datetime.h"
-
-namespace arrow {
-namespace py {
-
-static inline bool IsPyInteger(PyObject* obj) {
-#if PYARROW_IS_PY2
- return PyLong_Check(obj) || PyInt_Check(obj);
-#else
- return PyLong_Check(obj);
-#endif
-}
-
-class ScalarVisitor {
- public:
- ScalarVisitor()
- : total_count_(0),
- none_count_(0),
- bool_count_(0),
- int_count_(0),
- date_count_(0),
- timestamp_count_(0),
- float_count_(0),
- binary_count_(0),
- unicode_count_(0) {}
-
- void Visit(PyObject* obj) {
- ++total_count_;
- if (obj == Py_None) {
- ++none_count_;
- } else if (PyBool_Check(obj)) {
- ++bool_count_;
- } else if (PyFloat_Check(obj)) {
- ++float_count_;
- } else if (IsPyInteger(obj)) {
- ++int_count_;
- } else if (PyDate_CheckExact(obj)) {
- ++date_count_;
- } else if (PyDateTime_CheckExact(obj)) {
- ++timestamp_count_;
- } else if (PyBytes_Check(obj)) {
- ++binary_count_;
- } else if (PyUnicode_Check(obj)) {
- ++unicode_count_;
- } else {
- // TODO(wesm): accumulate error information somewhere
- }
- }
-
- std::shared_ptr<DataType> GetType() {
- // TODO(wesm): handling mixed-type cases
- if (float_count_) {
- return float64();
- } else if (int_count_) {
- // TODO(wesm): tighter type later
- return int64();
- } else if (date_count_) {
- return date64();
- } else if (timestamp_count_) {
- return timestamp(TimeUnit::MICRO);
- } else if (bool_count_) {
- return boolean();
- } else if (binary_count_) {
- return binary();
- } else if (unicode_count_) {
- return utf8();
- } else {
- return null();
- }
- }
-
- int64_t total_count() const { return total_count_; }
-
- private:
- int64_t total_count_;
- int64_t none_count_;
- int64_t bool_count_;
- int64_t int_count_;
- int64_t date_count_;
- int64_t timestamp_count_;
- int64_t float_count_;
- int64_t binary_count_;
- int64_t unicode_count_;
-
- // Place to accumulate errors
- // std::vector<Status> errors_;
-};
-
-static constexpr int MAX_NESTING_LEVELS = 32;
-
-class SeqVisitor {
- public:
- SeqVisitor() : max_nesting_level_(0) {
- memset(nesting_histogram_, 0, MAX_NESTING_LEVELS * sizeof(int));
- }
-
- Status Visit(PyObject* obj, int level = 0) {
- Py_ssize_t size = PySequence_Size(obj);
-
- if (level > max_nesting_level_) { max_nesting_level_ = level; }
-
- for (int64_t i = 0; i < size; ++i) {
- // TODO(wesm): Error checking?
- // TODO(wesm): Specialize for PyList_GET_ITEM?
- OwnedRef item_ref(PySequence_GetItem(obj, i));
- PyObject* item = item_ref.obj();
-
- if (PyList_Check(item)) {
- RETURN_NOT_OK(Visit(item, level + 1));
- } else if (PyDict_Check(item)) {
- return Status::NotImplemented("No type inference for dicts");
- } else {
- // We permit nulls at any level of nesting
- if (item == Py_None) {
- // TODO
- } else {
- ++nesting_histogram_[level];
- scalars_.Visit(item);
- }
- }
- }
- return Status::OK();
- }
-
- std::shared_ptr<DataType> GetType() {
- if (scalars_.total_count() == 0) {
- if (max_nesting_level_ == 0) {
- return null();
- } else {
- return nullptr;
- }
- } else {
- std::shared_ptr<DataType> result = scalars_.GetType();
- for (int i = 0; i < max_nesting_level_; ++i) {
- result = std::make_shared<ListType>(result);
- }
- return result;
- }
- }
-
- Status Validate() const {
- if (scalars_.total_count() > 0) {
- if (num_nesting_levels() > 1) {
- return Status::Invalid("Mixed nesting levels not supported");
- } else if (max_observed_level() < max_nesting_level_) {
- return Status::Invalid("Mixed nesting levels not supported");
- }
- }
- return Status::OK();
- }
-
- int max_observed_level() const {
- int result = 0;
- for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
- if (nesting_histogram_[i] > 0) { result = i; }
- }
- return result;
- }
-
- int num_nesting_levels() const {
- int result = 0;
- for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
- if (nesting_histogram_[i] > 0) { ++result; }
- }
- return result;
- }
-
- private:
- ScalarVisitor scalars_;
-
- // Track observed
- int max_nesting_level_;
- int nesting_histogram_[MAX_NESTING_LEVELS];
-};
-
-// Non-exhaustive type inference
-Status InferArrowType(PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) {
- *size = PySequence_Size(obj);
- if (PyErr_Occurred()) {
- // Not a sequence
- PyErr_Clear();
- return Status::TypeError("Object is not a sequence");
- }
-
- // For 0-length sequences, refuse to guess
- if (*size == 0) { *out_type = null(); }
-
- SeqVisitor seq_visitor;
- RETURN_NOT_OK(seq_visitor.Visit(obj));
- RETURN_NOT_OK(seq_visitor.Validate());
-
- *out_type = seq_visitor.GetType();
-
- if (*out_type == nullptr) { return Status::TypeError("Unable to determine data type"); }
-
- return Status::OK();
-}
-
-// Marshal Python sequence (list, tuple, etc.) to Arrow array
-class SeqConverter {
- public:
- virtual Status Init(const std::shared_ptr<ArrayBuilder>& builder) {
- builder_ = builder;
- return Status::OK();
- }
-
- virtual Status AppendData(PyObject* seq) = 0;
-
- protected:
- std::shared_ptr<ArrayBuilder> builder_;
-};
-
-template <typename BuilderType>
-class TypedConverter : public SeqConverter {
- public:
- Status Init(const std::shared_ptr<ArrayBuilder>& builder) override {
- builder_ = builder;
- typed_builder_ = static_cast<BuilderType*>(builder.get());
- return Status::OK();
- }
-
- protected:
- BuilderType* typed_builder_;
-};
-
-class BoolConverter : public TypedConverter<BooleanBuilder> {
- public:
- Status AppendData(PyObject* seq) override {
- Py_ssize_t size = PySequence_Size(seq);
- RETURN_NOT_OK(typed_builder_->Reserve(size));
- for (int64_t i = 0; i < size; ++i) {
- OwnedRef item(PySequence_GetItem(seq, i));
- if (item.obj() == Py_None) {
- typed_builder_->AppendNull();
- } else {
- if (item.obj() == Py_True) {
- typed_builder_->Append(true);
- } else {
- typed_builder_->Append(false);
- }
- }
- }
- return Status::OK();
- }
-};
-
-class Int64Converter : public TypedConverter<Int64Builder> {
- public:
- Status AppendData(PyObject* seq) override {
- int64_t val;
- Py_ssize_t size = PySequence_Size(seq);
- RETURN_NOT_OK(typed_builder_->Reserve(size));
- for (int64_t i = 0; i < size; ++i) {
- OwnedRef item(PySequence_GetItem(seq, i));
- if (item.obj() == Py_None) {
- typed_builder_->AppendNull();
- } else {
- val = PyLong_AsLongLong(item.obj());
- RETURN_IF_PYERROR();
- typed_builder_->Append(val);
- }
- }
- return Status::OK();
- }
-};
-
-class DateConverter : public TypedConverter<Date64Builder> {
- public:
- Status AppendData(PyObject* seq) override {
- Py_ssize_t size = PySequence_Size(seq);
- RETURN_NOT_OK(typed_builder_->Reserve(size));
- for (int64_t i = 0; i < size; ++i) {
- OwnedRef item(PySequence_GetItem(seq, i));
- if (item.obj() == Py_None) {
- typed_builder_->AppendNull();
- } else {
- PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
- typed_builder_->Append(PyDate_to_ms(pydate));
- }
- }
- return Status::OK();
- }
-};
-
-class TimestampConverter : public TypedConverter<TimestampBuilder> {
- public:
- Status AppendData(PyObject* seq) override {
- Py_ssize_t size = PySequence_Size(seq);
- RETURN_NOT_OK(typed_builder_->Reserve(size));
- for (int64_t i = 0; i < size; ++i) {
- OwnedRef item(PySequence_GetItem(seq, i));
- if (item.obj() == Py_None) {
- typed_builder_->AppendNull();
- } else {
- PyDateTime_DateTime* pydatetime =
- reinterpret_cast<PyDateTime_DateTime*>(item.obj());
- struct tm datetime = {0};
- datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900;
- datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1;
- datetime.tm_mday = PyDateTime_GET_DAY(pydatetime);
- datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime);
- datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime);
- datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime);
- int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
- RETURN_IF_PYERROR();
- struct tm epoch = {0};
- epoch.tm_year = 70;
- epoch.tm_mday = 1;
- // Microseconds since the epoch
- int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us;
- typed_builder_->Append(val);
- }
- }
- return Status::OK();
- }
-};
-
-class DoubleConverter : public TypedConverter<DoubleBuilder> {
- public:
- Status AppendData(PyObject* seq) override {
- double val;
- Py_ssize_t size = PySequence_Size(seq);
- RETURN_NOT_OK(typed_builder_->Reserve(size));
- for (int64_t i = 0; i < size; ++i) {
- OwnedRef item(PySequence_GetItem(seq, i));
- if (item.obj() == Py_None) {
- typed_builder_->AppendNull();
- } else {
- val = PyFloat_AsDouble(item.obj());
- RETURN_IF_PYERROR();
- typed_builder_->Append(val);
- }
- }
- return Status::OK();
- }
-};
-
-class BytesConverter : public TypedConverter<BinaryBuilder> {
- public:
- Status AppendData(PyObject* seq) override {
- PyObject* item;
- PyObject* bytes_obj;
- OwnedRef tmp;
- const char* bytes;
- int64_t length;
- Py_ssize_t size = PySequence_Size(seq);
- for (int64_t i = 0; i < size; ++i) {
- item = PySequence_GetItem(seq, i);
- OwnedRef holder(item);
-
- if (item == Py_None) {
- RETURN_NOT_OK(typed_builder_->AppendNull());
- continue;
- } else if (PyUnicode_Check(item)) {
- tmp.reset(PyUnicode_AsUTF8String(item));
- RETURN_IF_PYERROR();
- bytes_obj = tmp.obj();
- } else if (PyBytes_Check(item)) {
- bytes_obj = item;
- } else {
- return Status::TypeError("Non-string value encountered");
- }
- // No error checking
- length = PyBytes_GET_SIZE(bytes_obj);
- bytes = PyBytes_AS_STRING(bytes_obj);
- RETURN_NOT_OK(typed_builder_->Append(bytes, length));
- }
- return Status::OK();
- }
-};
-
-class UTF8Converter : public TypedConverter<StringBuilder> {
- public:
- Status AppendData(PyObject* seq) override {
- PyObject* item;
- PyObject* bytes_obj;
- OwnedRef tmp;
- const char* bytes;
- int64_t length;
- Py_ssize_t size = PySequence_Size(seq);
- for (int64_t i = 0; i < size; ++i) {
- item = PySequence_GetItem(seq, i);
- OwnedRef holder(item);
-
- if (item == Py_None) {
- RETURN_NOT_OK(typed_builder_->AppendNull());
- continue;
- } else if (!PyUnicode_Check(item)) {
- return Status::TypeError("Non-unicode value encountered");
- }
- tmp.reset(PyUnicode_AsUTF8String(item));
- RETURN_IF_PYERROR();
- bytes_obj = tmp.obj();
-
- // No error checking
- length = PyBytes_GET_SIZE(bytes_obj);
- bytes = PyBytes_AS_STRING(bytes_obj);
- RETURN_NOT_OK(typed_builder_->Append(bytes, length));
- }
- return Status::OK();
- }
-};
-
-class ListConverter : public TypedConverter<ListBuilder> {
- public:
- Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;
-
- Status AppendData(PyObject* seq) override {
- Py_ssize_t size = PySequence_Size(seq);
- for (int64_t i = 0; i < size; ++i) {
- OwnedRef item(PySequence_GetItem(seq, i));
- if (item.obj() == Py_None) {
- RETURN_NOT_OK(typed_builder_->AppendNull());
- } else {
- typed_builder_->Append();
- RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
- }
- }
- return Status::OK();
- }
-
- protected:
- std::shared_ptr<SeqConverter> value_converter_;
-};
-
-// Dynamic constructor for sequence converters
-std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
- switch (type->type) {
- case Type::BOOL:
- return std::make_shared<BoolConverter>();
- case Type::INT64:
- return std::make_shared<Int64Converter>();
- case Type::DATE64:
- return std::make_shared<DateConverter>();
- case Type::TIMESTAMP:
- return std::make_shared<TimestampConverter>();
- case Type::DOUBLE:
- return std::make_shared<DoubleConverter>();
- case Type::BINARY:
- return std::make_shared<BytesConverter>();
- case Type::STRING:
- return std::make_shared<UTF8Converter>();
- case Type::LIST:
- return std::make_shared<ListConverter>();
- case Type::STRUCT:
- default:
- return nullptr;
- break;
- }
-}
-
-Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
- builder_ = builder;
- typed_builder_ = static_cast<ListBuilder*>(builder.get());
-
- value_converter_ =
- GetConverter(static_cast<ListType*>(builder->type().get())->value_type());
- if (value_converter_ == nullptr) {
- return Status::NotImplemented("value type not implemented");
- }
-
- value_converter_->Init(typed_builder_->value_builder());
- return Status::OK();
-}
-
-Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
- const std::shared_ptr<ArrayBuilder>& builder) {
- std::shared_ptr<SeqConverter> converter = GetConverter(type);
- if (converter == nullptr) {
- std::stringstream ss;
- ss << "No type converter implemented for " << type->ToString();
- return Status::NotImplemented(ss.str());
- }
- converter->Init(builder);
-
- return converter->AppendData(obj);
-}
-
-Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out) {
- std::shared_ptr<DataType> type;
- int64_t size;
- PyDateTime_IMPORT;
- RETURN_NOT_OK(InferArrowType(obj, &size, &type));
-
- // Handle NA / NullType case
- if (type->type == Type::NA) {
- out->reset(new NullArray(size));
- return Status::OK();
- }
-
- // Give the sequence converter an array builder
- std::shared_ptr<ArrayBuilder> builder;
- RETURN_NOT_OK(MakeBuilder(pool, type, &builder));
- RETURN_NOT_OK(AppendPySequence(obj, type, builder));
-
- return builder->Finish(out);
-}
-
-} // namespace py
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/adapters/builtin.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.h b/python/src/pyarrow/adapters/builtin.h
deleted file mode 100644
index 2d45e67..0000000
--- a/python/src/pyarrow/adapters/builtin.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// Functions for converting between CPython built-in data structures and Arrow
-// data structures
-
-#ifndef PYARROW_ADAPTERS_BUILTIN_H
-#define PYARROW_ADAPTERS_BUILTIN_H
-
-#include <Python.h>
-
-#include <memory>
-
-#include <arrow/type.h>
-
-#include "arrow/util/visibility.h"
-
-#include "pyarrow/common.h"
-
-namespace arrow {
-
-class Array;
-class Status;
-
-namespace py {
-
-ARROW_EXPORT arrow::Status InferArrowType(
- PyObject* obj, int64_t* size, std::shared_ptr<arrow::DataType>* out_type);
-
-ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj,
- const std::shared_ptr<arrow::DataType>& type,
- const std::shared_ptr<arrow::ArrayBuilder>& builder);
-
-ARROW_EXPORT
-Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out);
-
-} // namespace py
-} // namespace arrow
-
-#endif // PYARROW_ADAPTERS_BUILTIN_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/adapters/pandas-test.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas-test.cc b/python/src/pyarrow/adapters/pandas-test.cc
deleted file mode 100644
index e694e79..0000000
--- a/python/src/pyarrow/adapters/pandas-test.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gtest/gtest.h"
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/builder.h"
-#include "arrow/schema.h"
-#include "arrow/table.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "pyarrow/adapters/pandas.h"
-
-namespace arrow {
-namespace py {
-
-TEST(PandasConversionTest, TestObjectBlockWriteFails) {
- StringBuilder builder;
- const char value[] = {'\xf1', '\0'};
-
- for (int i = 0; i < 1000; ++i) {
- builder.Append(value, strlen(value));
- }
-
- std::shared_ptr<Array> arr;
- ASSERT_OK(builder.Finish(&arr));
-
- auto f1 = field("f1", utf8());
- auto f2 = field("f2", utf8());
- auto f3 = field("f3", utf8());
- std::vector<std::shared_ptr<Field>> fields = {f1, f2, f3};
- std::vector<std::shared_ptr<Column>> cols = {std::make_shared<Column>(f1, arr),
- std::make_shared<Column>(f2, arr), std::make_shared<Column>(f3, arr)};
-
- auto schema = std::make_shared<Schema>(fields);
- auto table = std::make_shared<Table>("", schema, cols);
-
- PyObject* out;
- Py_BEGIN_ALLOW_THREADS;
- ASSERT_RAISES(UnknownError, ConvertTableToPandas(table, 2, &out));
- Py_END_ALLOW_THREADS;
-}
-
-} // namespace py
-} // namespace arrow