You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/26 15:45:47 UTC

[5/5] arrow git commit: ARROW-341: [Python] Move pyarrow's C++ code to the main C++ source tree, install libarrow_python and headers

ARROW-341: [Python] Move pyarrow's C++ code to the main C++ source tree, install libarrow_python and headers

This will enable third parties to link to `libarrow_python`.

For now, the pyarrow build system continues to use CMake -- for the purpose of resolving the thirdparty toolchain we may or may not want to go completely to distutils, but we can sort that out later.

Author: Wes McKinney <we...@twosigma.com>

Closes #440 from wesm/ARROW-341 and squashes the following commits:

193bc51 [Wes McKinney] Ensure that '-undefined dynamic_lookup' is passed when linking shared library on OS X
a93496b [Wes McKinney] Add missing backslash
7620f50 [Wes McKinney] Fix cpplint issues
0617c69 [Wes McKinney] Fix LD_LIBRARY_PATH, ARROW_HOME
090c78c [Wes McKinney] Build Arrow library stack specific to active Python version
10e4626 [Wes McKinney] Get Python test suite passing again
cfb7f44 [Wes McKinney] Remove print statement
c1e63dc [Wes McKinney] Scrubbing python/CMakeLists.txt
b80b153 [Wes McKinney] Cleanup, build pandas-test within main test suite
7ef1f81 [Wes McKinney] Start moving python/src/pyarrow tp cpp/src/arrow/python


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3aac4ade
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3aac4ade
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3aac4ade

Branch: refs/heads/master
Commit: 3aac4adef11345f211e4c66467ff758cbc397e43
Parents: 6d4e862
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Mar 26 11:45:38 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Mar 26 11:45:38 2017 -0400

----------------------------------------------------------------------
 ci/travis_script_python.sh                   |   26 +-
 cpp/CMakeLists.txt                           |  115 +-
 cpp/cmake_modules/BuildUtils.cmake           |   88 +-
 cpp/cmake_modules/FindNumPy.cmake            |  100 ++
 cpp/cmake_modules/FindPythonLibsNew.cmake    |  241 +++
 cpp/src/arrow/python/CMakeLists.txt          |   93 +
 cpp/src/arrow/python/api.h                   |   27 +
 cpp/src/arrow/python/builtin_convert.cc      |  527 ++++++
 cpp/src/arrow/python/builtin_convert.h       |   54 +
 cpp/src/arrow/python/common.cc               |   68 +
 cpp/src/arrow/python/common.h                |  139 ++
 cpp/src/arrow/python/config.cc               |   35 +
 cpp/src/arrow/python/config.h                |   45 +
 cpp/src/arrow/python/do_import_numpy.h       |   21 +
 cpp/src/arrow/python/helpers.cc              |   55 +
 cpp/src/arrow/python/helpers.h               |   35 +
 cpp/src/arrow/python/io.cc                   |  222 +++
 cpp/src/arrow/python/io.h                    |   99 ++
 cpp/src/arrow/python/numpy_interop.h         |   60 +
 cpp/src/arrow/python/pandas-test.cc          |   64 +
 cpp/src/arrow/python/pandas_convert.cc       | 1936 +++++++++++++++++++++
 cpp/src/arrow/python/pandas_convert.h        |   79 +
 cpp/src/arrow/python/type_traits.h           |  213 +++
 cpp/src/arrow/python/util/CMakeLists.txt     |   39 +
 cpp/src/arrow/python/util/datetime.h         |   42 +
 cpp/src/arrow/python/util/test_main.cc       |   36 +
 python/CMakeLists.txt                        |  215 +--
 python/cmake_modules/FindArrow.cmake         |    9 +
 python/cmake_modules/FindNumPy.cmake         |  100 --
 python/cmake_modules/FindPythonLibsNew.cmake |  241 ---
 python/pyarrow/config.pyx                    |   14 +-
 python/pyarrow/includes/pyarrow.pxd          |    6 +-
 python/setup.py                              |   11 +-
 python/src/pyarrow/CMakeLists.txt            |   22 -
 python/src/pyarrow/adapters/builtin.cc       |  527 ------
 python/src/pyarrow/adapters/builtin.h        |   54 -
 python/src/pyarrow/adapters/pandas-test.cc   |   64 -
 python/src/pyarrow/adapters/pandas.cc        | 1936 ---------------------
 python/src/pyarrow/adapters/pandas.h         |   79 -
 python/src/pyarrow/api.h                     |   26 -
 python/src/pyarrow/common.cc                 |   69 -
 python/src/pyarrow/common.h                  |  137 --
 python/src/pyarrow/config.cc                 |   35 -
 python/src/pyarrow/config.h                  |   46 -
 python/src/pyarrow/do_import_numpy.h         |   21 -
 python/src/pyarrow/helpers.cc                |   55 -
 python/src/pyarrow/helpers.h                 |   35 -
 python/src/pyarrow/io.cc                     |  221 ---
 python/src/pyarrow/io.h                      |   99 --
 python/src/pyarrow/numpy_interop.h           |   60 -
 python/src/pyarrow/type_traits.h             |  212 ---
 python/src/pyarrow/util/CMakeLists.txt       |   39 -
 python/src/pyarrow/util/datetime.h           |   42 -
 python/src/pyarrow/util/test_main.cc         |   36 -
 54 files changed, 4409 insertions(+), 4461 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 6f4b8e9..df11209 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -23,7 +23,6 @@ export MINICONDA=$HOME/miniconda
 export PATH="$MINICONDA/bin:$PATH"
 
 export ARROW_HOME=$ARROW_CPP_INSTALL
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ARROW_CPP_INSTALL/lib
 
 pushd $PYTHON_DIR
 export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
@@ -70,11 +69,31 @@ build_parquet_cpp() {
 
 build_parquet_cpp
 
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PARQUET_HOME/lib
+function build_arrow_libraries() {
+  CPP_BUILD_DIR=$1
+  CPP_DIR=$TRAVIS_BUILD_DIR/cpp
+
+  mkdir $CPP_BUILD_DIR
+  pushd $CPP_BUILD_DIR
+
+  cmake -DARROW_BUILD_TESTS=off \
+        -DARROW_PYTHON=on \
+        -DCMAKE_INSTALL_PREFIX=$2 \
+        $CPP_DIR
+
+  make -j4
+  make install
+
+  popd
+}
 
 python_version_tests() {
   PYTHON_VERSION=$1
   CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+
+  export ARROW_HOME=$TRAVIS_BUILD_DIR/arrow-install-$PYTHON_VERSION
+  export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib
+
   conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
   source activate $CONDA_ENV_DIR
 
@@ -87,6 +106,9 @@ python_version_tests() {
   # Expensive dependencies install from Continuum package repo
   conda install -y pip numpy pandas cython
 
+  # Build C++ libraries
+  build_arrow_libraries arrow-build-$PYTHON_VERSION $ARROW_HOME
+
   # Other stuff pip install
   pip install -r requirements.txt
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c04afe4..c77cf60 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -106,6 +106,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
     "Rely on boost shared libraries where relevant"
     ON)
 
+  option(ARROW_PYTHON
+    "Build the Arrow CPython extensions"
+    OFF)
+
   option(ARROW_SSE3
     "Build Arrow with SSE3"
     ON)
@@ -133,6 +137,7 @@ if(NOT ARROW_BUILD_BENCHMARKS)
   set(NO_BENCHMARKS 1)
 endif()
 
+include(BuildUtils)
 
 ############################################################
 # Compiler flags
@@ -303,6 +308,14 @@ endfunction()
 #
 # Arguments after the test name will be passed to set_tests_properties().
 function(ADD_ARROW_TEST REL_TEST_NAME)
+  set(options)
+  set(single_value_args)
+  set(multi_value_args STATIC_LINK_LIBS)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
   if(NO_TESTS OR NOT ARROW_BUILD_STATIC)
     return()
   endif()
@@ -312,7 +325,13 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
     # This test has a corresponding .cc file, set it up as an executable.
     set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
     add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
-    target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+
+    if (ARG_STATIC_LINK_LIBS)
+      # Customize link libraries
+      target_link_libraries(${TEST_NAME} ${ARG_STATIC_LINK_LIBS})
+    else()
+      target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS})
+    endif()
     add_dependencies(unittest ${TEST_NAME})
   else()
     # No executable, just invoke the test (probably a script) directly.
@@ -332,10 +351,6 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
       ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
   endif()
   set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
-
-  if(ARGN)
-    set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
-  endif()
 endfunction()
 
 # A wrapper for add_dependencies() that is compatible with NO_TESTS.
@@ -363,72 +378,6 @@ enable_testing()
 ############################################################
 # Dependencies
 ############################################################
-function(ADD_THIRDPARTY_LIB LIB_NAME)
-  set(options)
-  set(one_value_args SHARED_LIB STATIC_LIB)
-  set(multi_value_args DEPS)
-  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
-  if(ARG_UNPARSED_ARGUMENTS)
-    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
-  endif()
-
-  if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
-    if(NOT ARG_STATIC_LIB)
-      message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
-    endif()
-
-    SET(AUG_LIB_NAME "${LIB_NAME}_static")
-    add_library(${AUG_LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${AUG_LIB_NAME}
-      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
-    message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
-
-    SET(AUG_LIB_NAME "${LIB_NAME}_shared")
-    add_library(${AUG_LIB_NAME} SHARED IMPORTED)
-
-    if(MSVC)
-        # Mark the \u201d.lib\u201d location as part of a Windows DLL
-        set_target_properties(${AUG_LIB_NAME}
-            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
-    else()
-        set_target_properties(${AUG_LIB_NAME}
-            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
-    endif()
-    message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
-  elseif(ARG_STATIC_LIB)
-    add_library(${LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${LIB_NAME}
-      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
-    SET(AUG_LIB_NAME "${LIB_NAME}_static")
-    add_library(${AUG_LIB_NAME} STATIC IMPORTED)
-    set_target_properties(${AUG_LIB_NAME}
-      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
-    message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
-  elseif(ARG_SHARED_LIB)
-    add_library(${LIB_NAME} SHARED IMPORTED)
-    set_target_properties(${LIB_NAME}
-      PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
-    SET(AUG_LIB_NAME "${LIB_NAME}_shared")
-    add_library(${AUG_LIB_NAME} SHARED IMPORTED)
-
-    if(MSVC)
-        # Mark the \u201d.lib\u201d location as part of a Windows DLL
-        set_target_properties(${AUG_LIB_NAME}
-            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
-    else()
-        set_target_properties(${AUG_LIB_NAME}
-            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
-    endif()
-    message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
-  else()
-    message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
-  endif()
-
-  if(ARG_DEPS)
-    set_target_properties(${LIB_NAME}
-      PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
-  endif()
-endfunction()
 
 # ----------------------------------------------------------------------
 # Add Boost dependencies (code adapted from Apache Kudu (incubating))
@@ -798,8 +747,7 @@ if (${CLANG_FORMAT_FOUND})
   add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1
     `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
     sed -e '/_generated/g' |
-    sed -e '/windows_compatibility.h/g'`
-    `find ${CMAKE_CURRENT_SOURCE_DIR}/../python -name \\*.cc -or -name \\*.h`)
+    sed -e '/windows_compatibility.h/g'`)
 
   # runs clang format and exits with a non-zero exit code if any files need to be reformatted
   add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 0
@@ -857,11 +805,9 @@ if(NOT APPLE)
   set(ARROW_SHARED_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/arrow/symbols.map")
 endif()
 
-include(BuildUtils)
-
 ADD_ARROW_LIB(arrow
-    SOURCES ${ARROW_SRCS}
-    SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS}
+  SOURCES ${ARROW_SRCS}
+  SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS}
 )
 
 add_subdirectory(src/arrow)
@@ -875,6 +821,10 @@ endif()
 #----------------------------------------------------------------------
 # IPC library
 
+if(ARROW_PYTHON)
+  set(ARROW_IPC on)
+endif()
+
 ## Flatbuffers
 if(ARROW_IPC)
   if("$ENV{FLATBUFFERS_HOME}" STREQUAL "")
@@ -908,3 +858,14 @@ if(ARROW_IPC)
 
   add_subdirectory(src/arrow/ipc)
 endif()
+
+if(ARROW_PYTHON)
+  find_package(PythonLibsNew REQUIRED)
+  find_package(NumPy REQUIRED)
+
+  include_directories(SYSTEM
+    ${NUMPY_INCLUDE_DIRS}
+    ${PYTHON_INCLUDE_DIRS})
+
+  add_subdirectory(src/arrow/python)
+endif()

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/cmake_modules/BuildUtils.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 78b514c..c993041 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -15,6 +15,73 @@
 # specific language governing permissions and limitations
 # under the License.
 
+function(ADD_THIRDPARTY_LIB LIB_NAME)
+  set(options)
+  set(one_value_args SHARED_LIB STATIC_LIB)
+  set(multi_value_args DEPS)
+  cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN})
+  if(ARG_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
+  endif()
+
+  if(ARG_STATIC_LIB AND ARG_SHARED_LIB)
+    if(NOT ARG_STATIC_LIB)
+      message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
+    endif()
+
+    SET(AUG_LIB_NAME "${LIB_NAME}_static")
+    add_library(${AUG_LIB_NAME} STATIC IMPORTED)
+    set_target_properties(${AUG_LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
+
+    SET(AUG_LIB_NAME "${LIB_NAME}_shared")
+    add_library(${AUG_LIB_NAME} SHARED IMPORTED)
+
+    if(MSVC)
+        # Mark the \u201d.lib\u201d location as part of a Windows DLL
+        set_target_properties(${AUG_LIB_NAME}
+            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+    else()
+        set_target_properties(${AUG_LIB_NAME}
+            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+    endif()
+    message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
+  elseif(ARG_STATIC_LIB)
+    add_library(${LIB_NAME} STATIC IMPORTED)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    SET(AUG_LIB_NAME "${LIB_NAME}_static")
+    add_library(${AUG_LIB_NAME} STATIC IMPORTED)
+    set_target_properties(${AUG_LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}")
+    message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}")
+  elseif(ARG_SHARED_LIB)
+    add_library(${LIB_NAME} SHARED IMPORTED)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+    SET(AUG_LIB_NAME "${LIB_NAME}_shared")
+    add_library(${AUG_LIB_NAME} SHARED IMPORTED)
+
+    if(MSVC)
+        # Mark the \u201d.lib\u201d location as part of a Windows DLL
+        set_target_properties(${AUG_LIB_NAME}
+            PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}")
+    else()
+        set_target_properties(${AUG_LIB_NAME}
+            PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}")
+    endif()
+    message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}")
+  else()
+    message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}")
+  endif()
+
+  if(ARG_DEPS)
+    set_target_properties(${LIB_NAME}
+      PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}")
+  endif()
+endfunction()
+
 function(ADD_ARROW_LIB LIB_NAME)
   set(options)
   set(one_value_args SHARED_LINK_FLAGS)
@@ -45,9 +112,16 @@ function(ADD_ARROW_LIB LIB_NAME)
 
   if (ARROW_BUILD_SHARED)
     add_library(${LIB_NAME}_shared SHARED $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
+
     if(APPLE)
-      set_target_properties(${LIB_NAME}_shared PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+      # On OS X, you can avoid linking at library load time and instead
+      # expecting that the symbols have been loaded separately. This happens
+      # with libpython* where there can be conflicts between system Python and
+      # the Python from a thirdparty distribution
+      set(ARG_SHARED_LINK_FLAGS
+        "-undefined dynamic_lookup ${ARG_SHARED_LINK_FLAGS}")
     endif()
+
     set_target_properties(${LIB_NAME}_shared
       PROPERTIES
       LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
@@ -55,6 +129,7 @@ function(ADD_ARROW_LIB LIB_NAME)
       OUTPUT_NAME ${LIB_NAME}
       VERSION "${ARROW_ABI_VERSION}"
       SOVERSION "${ARROW_SO_VERSION}")
+
     target_link_libraries(${LIB_NAME}_shared
       LINK_PUBLIC ${ARG_SHARED_LINK_LIBS}
       LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS})
@@ -68,28 +143,28 @@ function(ADD_ARROW_LIB LIB_NAME)
         set_target_properties(${LIB_NAME}_shared PROPERTIES
             INSTALL_RPATH ${_lib_install_rpath})
     endif()
-  
+
     install(TARGETS ${LIB_NAME}_shared
       LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
   endif()
-  
+
   if (ARROW_BUILD_STATIC)
       add_library(${LIB_NAME}_static STATIC $<TARGET_OBJECTS:${LIB_NAME}_objlib>)
     set_target_properties(${LIB_NAME}_static
       PROPERTIES
       LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
       OUTPUT_NAME ${LIB_NAME})
-  
+
   target_link_libraries(${LIB_NAME}_static
       LINK_PUBLIC ${ARG_STATIC_LINK_LIBS}
       LINK_PRIVATE ${ARG_STATIC_PRIVATE_LINK_LIBS})
-  
+
   install(TARGETS ${LIB_NAME}_static
       LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
       ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
   endif()
-  
+
   if (APPLE)
       set_target_properties(${LIB_NAME}_shared
       PROPERTIES
@@ -98,4 +173,3 @@ function(ADD_ARROW_LIB LIB_NAME)
   endif()
 
 endfunction()
-

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/cmake_modules/FindNumPy.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindNumPy.cmake b/cpp/cmake_modules/FindNumPy.cmake
new file mode 100644
index 0000000..58bb531
--- /dev/null
+++ b/cpp/cmake_modules/FindNumPy.cmake
@@ -0,0 +1,100 @@
+# - Find the NumPy libraries
+# This module finds if NumPy is installed, and sets the following variables
+# indicating where it is.
+#
+# TODO: Update to provide the libraries and paths for linking npymath lib.
+#
+#  NUMPY_FOUND               - was NumPy found
+#  NUMPY_VERSION             - the version of NumPy found as a string
+#  NUMPY_VERSION_MAJOR       - the major version number of NumPy
+#  NUMPY_VERSION_MINOR       - the minor version number of NumPy
+#  NUMPY_VERSION_PATCH       - the patch version number of NumPy
+#  NUMPY_VERSION_DECIMAL     - e.g. version 1.6.1 is 10601
+#  NUMPY_INCLUDE_DIRS        - path to the NumPy include files
+
+#============================================================================
+# Copyright 2012 Continuum Analytics, Inc.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#============================================================================
+
+# Finding NumPy involves calling the Python interpreter
+if(NumPy_FIND_REQUIRED)
+    find_package(PythonInterp REQUIRED)
+else()
+    find_package(PythonInterp)
+endif()
+
+if(NOT PYTHONINTERP_FOUND)
+    set(NUMPY_FOUND FALSE)
+    return()
+endif()
+
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "import numpy as n; print(n.__version__); print(n.get_include());"
+    RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS
+    OUTPUT_VARIABLE _NUMPY_VALUES_OUTPUT
+    ERROR_VARIABLE _NUMPY_ERROR_VALUE
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0)
+    if(NumPy_FIND_REQUIRED)
+        message(FATAL_ERROR
+            "NumPy import failure:\n${_NUMPY_ERROR_VALUE}")
+    endif()
+    set(NUMPY_FOUND FALSE)
+    return()
+endif()
+
+# Convert the process output into a list
+string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES_OUTPUT})
+string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES})
+list(GET _NUMPY_VALUES 0 NUMPY_VERSION)
+list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS)
+
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" _VER_CHECK "${NUMPY_VERSION}")
+if("${_VER_CHECK}" STREQUAL "")
+    # The output from Python was unexpected. Raise an error always
+    # here, because we found NumPy, but it appears to be corrupted somehow.
+    message(FATAL_ERROR
+        "Requested version and include path from NumPy, got instead:\n${_NUMPY_VALUES_OUTPUT}\n")
+    return()
+endif()
+
+# Make sure all directory separators are '/'
+string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS})
+
+# Get the major and minor version numbers
+string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION})
+list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR)
+list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR)
+list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH)
+string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH})
+math(EXPR NUMPY_VERSION_DECIMAL
+    "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
+
+find_package_message(NUMPY
+    "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}"
+    "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}")
+
+set(NUMPY_FOUND TRUE)

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/cmake_modules/FindPythonLibsNew.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindPythonLibsNew.cmake b/cpp/cmake_modules/FindPythonLibsNew.cmake
new file mode 100644
index 0000000..1000a95
--- /dev/null
+++ b/cpp/cmake_modules/FindPythonLibsNew.cmake
@@ -0,0 +1,241 @@
+# - Find python libraries
+# This module finds the libraries corresponding to the Python interpeter
+# FindPythonInterp provides.
+# This code sets the following variables:
+#
+#  PYTHONLIBS_FOUND           - have the Python libs been found
+#  PYTHON_PREFIX              - path to the Python installation
+#  PYTHON_LIBRARIES           - path to the python library
+#  PYTHON_INCLUDE_DIRS        - path to where Python.h is found
+#  PYTHON_SITE_PACKAGES       - path to installation site-packages
+#  PYTHON_IS_DEBUG            - whether the Python interpreter is a debug build
+#
+#  PYTHON_INCLUDE_PATH        - path to where Python.h is found (deprecated)
+#
+# A function PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is defined
+# to build modules for python.
+#
+# Thanks to talljimbo for the patch adding the 'LDVERSION' config
+# variable usage.
+
+#=============================================================================
+# Copyright 2001-2009 Kitware, Inc.
+# Copyright 2012-2014 Continuum Analytics, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the names of Kitware, Inc., the Insight Software Consortium,
+# nor the names of their contributors may be used to endorse or promote
+# products derived from this software without specific prior written
+# permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+#  License text for the above reference.)
+
+# Use the Python interpreter to find the libs.
+if(PythonLibsNew_FIND_REQUIRED)
+    find_package(PythonInterp REQUIRED)
+else()
+    find_package(PythonInterp)
+endif()
+
+if(NOT PYTHONINTERP_FOUND)
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+
+# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter
+# testing whether sys has the gettotalrefcount function is a reliable,
+# cross-platform way to detect a CPython debug interpreter.
+#
+# The library suffix is from the config var LDVERSION sometimes, otherwise
+# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
+#
+# The config var LIBPL is for Linux, and helps on Debian Jessie where the
+# addition of multi-arch support shuffled things around.
+execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+    "from distutils import sysconfig as s;import sys;import struct;
+print('.'.join(str(v) for v in sys.version_info));
+print(sys.prefix);
+print(s.get_python_inc(plat_specific=True));
+print(s.get_python_lib(plat_specific=True));
+print(s.get_config_var('SO'));
+print(hasattr(sys, 'gettotalrefcount')+0);
+print(struct.calcsize('@P'));
+print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+print(s.get_config_var('LIBPL'));
+"
+    RESULT_VARIABLE _PYTHON_SUCCESS
+    OUTPUT_VARIABLE _PYTHON_VALUES
+    ERROR_VARIABLE _PYTHON_ERROR_VALUE
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+if(NOT _PYTHON_SUCCESS MATCHES 0)
+    if(PythonLibsNew_FIND_REQUIRED)
+        message(FATAL_ERROR
+            "Python config failure:\n${_PYTHON_ERROR_VALUE}")
+    endif()
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+
+# Convert the process output into a list
+string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
+string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
+list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
+list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
+list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
+list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
+list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
+list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
+list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
+list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
+list(GET _PYTHON_VALUES 8 PYTHON_LIBRARY_PATH)
+
+# Make sure the Python has the same pointer-size as the chosen compiler
+# Skip the check on OS X, it doesn't consistently have CMAKE_SIZEOF_VOID_P defined
+if((NOT APPLE) AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
+    if(PythonLibsNew_FIND_REQUIRED)
+        math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
+        math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
+        message(FATAL_ERROR
+            "Python config failure: Python is ${_PYTHON_BITS}-bit, "
+            "chosen compiler is  ${_CMAKE_BITS}-bit")
+    endif()
+    set(PYTHONLIBS_FOUND FALSE)
+    return()
+endif()
+
+# The built-in FindPython didn't always give the version numbers
+string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST})
+list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR)
+list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR)
+list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH)
+
+# Make sure all directory separators are '/'
+string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
+string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR})
+string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES})
+
+if(CMAKE_HOST_WIN32)
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
+        set(PYTHON_LIBRARY
+            "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    else()
+        set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/libpython${PYTHON_LIBRARY_SUFFIX}.a")
+    endif()
+elseif(APPLE)
+     # Seems to require "-undefined dynamic_lookup" instead of linking
+     # against the .dylib, otherwise it crashes. This flag is added
+     # below
+    set(PYTHON_LIBRARY "")
+    #set(PYTHON_LIBRARY
+    #    "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib")
+else()
+    if(${PYTHON_SIZEOF_VOID_P} MATCHES 8)
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}")
+    else()
+        set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}")
+    endif()
+    message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}")
+    message(STATUS "Looking for python${PYTHON_LIBRARY_SUFFIX}")
+    # Probably this needs to be more involved. It would be nice if the config
+    # information the python interpreter itself gave us were more complete.
+    find_library(PYTHON_LIBRARY
+        NAMES "python${PYTHON_LIBRARY_SUFFIX}"
+        PATHS ${_PYTHON_LIBS_SEARCH}
+        NO_SYSTEM_ENVIRONMENT_PATH)
+    message(STATUS "Found Python lib ${PYTHON_LIBRARY}")
+endif()
+
+# For backward compatibility, set PYTHON_INCLUDE_PATH, but make it internal.
+SET(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}" CACHE INTERNAL
+          "Path to where Python.h is found (deprecated)")
+
+MARK_AS_ADVANCED(
+  PYTHON_LIBRARY
+  PYTHON_INCLUDE_DIR
+)
+
+# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the
+# cache entries because they are meant to specify the location of a single
+# library. We now set the variables listed by the documentation for this
+# module.
+SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}")
+SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
+SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
+
+
+# Don't know how to get to this directory, just doing something simple :P
+#INCLUDE(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
+#FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs DEFAULT_MSG PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS)
+find_package_message(PYTHON
+    "Found PythonLibs: ${PYTHON_LIBRARY}"
+    "${PYTHON_EXECUTABLE}${PYTHON_VERSION}")
+
+
+# PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is used to build modules for python.
+FUNCTION(PYTHON_ADD_MODULE _NAME )
+  GET_PROPERTY(_TARGET_SUPPORTS_SHARED_LIBS
+    GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
+  OPTION(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE)
+  OPTION(PYTHON_MODULE_${_NAME}_BUILD_SHARED
+    "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS})
+
+  # Mark these options as advanced
+  MARK_AS_ADVANCED(PYTHON_ENABLE_MODULE_${_NAME}
+    PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+
+  IF(PYTHON_ENABLE_MODULE_${_NAME})
+    IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+      SET(PY_MODULE_TYPE MODULE)
+    ELSE(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+      SET(PY_MODULE_TYPE STATIC)
+      SET_PROPERTY(GLOBAL  APPEND  PROPERTY  PY_STATIC_MODULES_LIST ${_NAME})
+    ENDIF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+
+    SET_PROPERTY(GLOBAL  APPEND  PROPERTY  PY_MODULES_LIST ${_NAME})
+    ADD_LIBRARY(${_NAME} ${PY_MODULE_TYPE} ${ARGN})
+    IF(APPLE)
+      # On OS X, linking against the Python libraries causes
+      # segfaults, so do this dynamic lookup instead.
+      SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS
+                          "-undefined dynamic_lookup")
+    ELSE()
+     # In general, we should not link against libpython as we do not embed
+     # the Python interpreter. The python binary itself can then define where
+     # the symbols should loaded from.
+     SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS
+         "-Wl,-undefined,dynamic_lookup")
+    ENDIF()
+    IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED)
+      SET_TARGET_PROPERTIES(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")
+      SET_TARGET_PROPERTIES(${_NAME} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}")
+    ELSE()
+    ENDIF()
+
+  ENDIF(PYTHON_ENABLE_MODULE_${_NAME})
+ENDFUNCTION(PYTHON_ADD_MODULE)

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
new file mode 100644
index 0000000..03f5afc
--- /dev/null
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+
+#######################################
+# arrow_python
+#######################################
+
+if (ARROW_BUILD_TESTS)
+  add_library(arrow_python_test_main STATIC
+	util/test_main.cc)
+
+  if (APPLE)
+	target_link_libraries(arrow_python_test_main
+      gtest
+      dl)
+	set_target_properties(arrow_python_test_main
+      PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  else()
+	target_link_libraries(arrow_python_test_main
+      gtest
+      pthread
+      dl
+	  )
+  endif()
+endif()
+
+set(ARROW_PYTHON_MIN_TEST_LIBS
+  arrow_python_test_main
+  arrow_python_static
+  arrow_ipc_static
+  arrow_io_static
+  arrow_static)
+
+if(NOT APPLE AND ARROW_BUILD_TESTS)
+  ADD_THIRDPARTY_LIB(python
+    SHARED_LIB "${PYTHON_LIBRARIES}")
+  list(APPEND ARROW_PYTHON_MIN_TEST_LIBS python)
+endif()
+
+set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
+
+# ----------------------------------------------------------------------
+
+set(ARROW_PYTHON_SRCS
+  builtin_convert.cc
+  common.cc
+  config.cc
+  helpers.cc
+  io.cc
+  pandas_convert.cc
+)
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS
+  arrow_io_shared
+  arrow_ipc_shared
+  arrow_shared
+)
+
+ADD_ARROW_LIB(arrow_python
+  SOURCES ${ARROW_PYTHON_SRCS}
+  SHARED_LINK_FLAGS ""
+  SHARED_LINK_LIBS ${ARROW_PYTHON_SHARED_LINK_LIBS}
+  STATIC_LINK_LIBS ${ARROW_IO_SHARED_PRIVATE_LINK_LIBS}
+)
+
+install(FILES
+  api.h
+  builtin_convert.h
+  common.h
+  config.h
+  do_import_numpy.h
+  helpers.h
+  io.h
+  numpy_interop.h
+  pandas_convert.h
+  type_traits.h
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/python")
+
+# set_target_properties(arrow_python_shared PROPERTIES
+#     INSTALL_RPATH "\$ORIGIN")
+
+if (ARROW_BUILD_TESTS)
+  ADD_ARROW_TEST(pandas-test
+    STATIC_LINK_LIBS "${ARROW_PYTHON_TEST_LINK_LIBS}")
+endif()

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/api.h b/cpp/src/arrow/python/api.h
new file mode 100644
index 0000000..f4f1c0c
--- /dev/null
+++ b/cpp/src/arrow/python/api.h
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_API_H
+#define ARROW_PYTHON_API_H
+
+#include "arrow/python/builtin_convert.h"
+#include "arrow/python/common.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/io.h"
+#include "arrow/python/pandas_convert.h"
+
+#endif  // ARROW_PYTHON_API_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
new file mode 100644
index 0000000..9acccc1
--- /dev/null
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <Python.h>
+#include <datetime.h>
+#include <sstream>
+
+#include "arrow/python/builtin_convert.h"
+
+#include "arrow/api.h"
+#include "arrow/status.h"
+
+#include "arrow/python/helpers.h"
+#include "arrow/python/util/datetime.h"
+
+namespace arrow {
+namespace py {
+
+static inline bool IsPyInteger(PyObject* obj) {
+#if PYARROW_IS_PY2
+  return PyLong_Check(obj) || PyInt_Check(obj);
+#else
+  return PyLong_Check(obj);
+#endif
+}
+
+class ScalarVisitor {
+ public:
+  ScalarVisitor()
+      : total_count_(0),
+        none_count_(0),
+        bool_count_(0),
+        int_count_(0),
+        date_count_(0),
+        timestamp_count_(0),
+        float_count_(0),
+        binary_count_(0),
+        unicode_count_(0) {}
+
+  void Visit(PyObject* obj) {
+    ++total_count_;
+    if (obj == Py_None) {
+      ++none_count_;
+    } else if (PyBool_Check(obj)) {
+      ++bool_count_;
+    } else if (PyFloat_Check(obj)) {
+      ++float_count_;
+    } else if (IsPyInteger(obj)) {
+      ++int_count_;
+    } else if (PyDate_CheckExact(obj)) {
+      ++date_count_;
+    } else if (PyDateTime_CheckExact(obj)) {
+      ++timestamp_count_;
+    } else if (PyBytes_Check(obj)) {
+      ++binary_count_;
+    } else if (PyUnicode_Check(obj)) {
+      ++unicode_count_;
+    } else {
+      // TODO(wesm): accumulate error information somewhere
+    }
+  }
+
+  std::shared_ptr<DataType> GetType() {
+    // TODO(wesm): handling mixed-type cases
+    if (float_count_) {
+      return float64();
+    } else if (int_count_) {
+      // TODO(wesm): tighter type later
+      return int64();
+    } else if (date_count_) {
+      return date64();
+    } else if (timestamp_count_) {
+      return timestamp(TimeUnit::MICRO);
+    } else if (bool_count_) {
+      return boolean();
+    } else if (binary_count_) {
+      return binary();
+    } else if (unicode_count_) {
+      return utf8();
+    } else {
+      return null();
+    }
+  }
+
+  int64_t total_count() const { return total_count_; }
+
+ private:
+  int64_t total_count_;
+  int64_t none_count_;
+  int64_t bool_count_;
+  int64_t int_count_;
+  int64_t date_count_;
+  int64_t timestamp_count_;
+  int64_t float_count_;
+  int64_t binary_count_;
+  int64_t unicode_count_;
+
+  // Place to accumulate errors
+  // std::vector<Status> errors_;
+};
+
+static constexpr int MAX_NESTING_LEVELS = 32;
+
+class SeqVisitor {
+ public:
+  SeqVisitor() : max_nesting_level_(0) {
+    memset(nesting_histogram_, 0, MAX_NESTING_LEVELS * sizeof(int));
+  }
+
+  Status Visit(PyObject* obj, int level = 0) {
+    Py_ssize_t size = PySequence_Size(obj);
+
+    if (level > max_nesting_level_) { max_nesting_level_ = level; }
+
+    for (int64_t i = 0; i < size; ++i) {
+      // TODO(wesm): Error checking?
+      // TODO(wesm): Specialize for PyList_GET_ITEM?
+      OwnedRef item_ref(PySequence_GetItem(obj, i));
+      PyObject* item = item_ref.obj();
+
+      if (PyList_Check(item)) {
+        RETURN_NOT_OK(Visit(item, level + 1));
+      } else if (PyDict_Check(item)) {
+        return Status::NotImplemented("No type inference for dicts");
+      } else {
+        // We permit nulls at any level of nesting
+        if (item == Py_None) {
+          // TODO
+        } else {
+          ++nesting_histogram_[level];
+          scalars_.Visit(item);
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> GetType() {
+    if (scalars_.total_count() == 0) {
+      if (max_nesting_level_ == 0) {
+        return null();
+      } else {
+        return nullptr;
+      }
+    } else {
+      std::shared_ptr<DataType> result = scalars_.GetType();
+      for (int i = 0; i < max_nesting_level_; ++i) {
+        result = std::make_shared<ListType>(result);
+      }
+      return result;
+    }
+  }
+
+  Status Validate() const {
+    if (scalars_.total_count() > 0) {
+      if (num_nesting_levels() > 1) {
+        return Status::Invalid("Mixed nesting levels not supported");
+      } else if (max_observed_level() < max_nesting_level_) {
+        return Status::Invalid("Mixed nesting levels not supported");
+      }
+    }
+    return Status::OK();
+  }
+
+  int max_observed_level() const {
+    int result = 0;
+    for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
+      if (nesting_histogram_[i] > 0) { result = i; }
+    }
+    return result;
+  }
+
+  int num_nesting_levels() const {
+    int result = 0;
+    for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
+      if (nesting_histogram_[i] > 0) { ++result; }
+    }
+    return result;
+  }
+
+ private:
+  ScalarVisitor scalars_;
+
+  // Track observed
+  int max_nesting_level_;
+  int nesting_histogram_[MAX_NESTING_LEVELS];
+};
+
+// Non-exhaustive type inference
+Status InferArrowType(PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) {
+  *size = PySequence_Size(obj);
+  if (PyErr_Occurred()) {
+    // Not a sequence
+    PyErr_Clear();
+    return Status::TypeError("Object is not a sequence");
+  }
+
+  // For 0-length sequences, refuse to guess
+  if (*size == 0) { *out_type = null(); }
+
+  SeqVisitor seq_visitor;
+  RETURN_NOT_OK(seq_visitor.Visit(obj));
+  RETURN_NOT_OK(seq_visitor.Validate());
+
+  *out_type = seq_visitor.GetType();
+
+  if (*out_type == nullptr) { return Status::TypeError("Unable to determine data type"); }
+
+  return Status::OK();
+}
+
+// Marshal Python sequence (list, tuple, etc.) to Arrow array
+class SeqConverter {
+ public:
+  virtual Status Init(const std::shared_ptr<ArrayBuilder>& builder) {
+    builder_ = builder;
+    return Status::OK();
+  }
+
+  virtual Status AppendData(PyObject* seq) = 0;
+
+ protected:
+  std::shared_ptr<ArrayBuilder> builder_;
+};
+
+template <typename BuilderType>
+class TypedConverter : public SeqConverter {
+ public:
+  Status Init(const std::shared_ptr<ArrayBuilder>& builder) override {
+    builder_ = builder;
+    typed_builder_ = static_cast<BuilderType*>(builder.get());
+    return Status::OK();
+  }
+
+ protected:
+  BuilderType* typed_builder_;
+};
+
+class BoolConverter : public TypedConverter<BooleanBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        if (item.obj() == Py_True) {
+          typed_builder_->Append(true);
+        } else {
+          typed_builder_->Append(false);
+        }
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class Int64Converter : public TypedConverter<Int64Builder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    int64_t val;
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        val = PyLong_AsLongLong(item.obj());
+        RETURN_IF_PYERROR();
+        typed_builder_->Append(val);
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class DateConverter : public TypedConverter<Date64Builder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+        typed_builder_->Append(PyDate_to_ms(pydate));
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class TimestampConverter : public TypedConverter<TimestampBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        PyDateTime_DateTime* pydatetime =
+            reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+        struct tm datetime = {0};
+        datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900;
+        datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1;
+        datetime.tm_mday = PyDateTime_GET_DAY(pydatetime);
+        datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime);
+        datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime);
+        datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime);
+        int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
+        RETURN_IF_PYERROR();
+        struct tm epoch = {0};
+        epoch.tm_year = 70;
+        epoch.tm_mday = 1;
+        // Microseconds since the epoch
+        int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us;
+        typed_builder_->Append(val);
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class DoubleConverter : public TypedConverter<DoubleBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    double val;
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        val = PyFloat_AsDouble(item.obj());
+        RETURN_IF_PYERROR();
+        typed_builder_->Append(val);
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class BytesConverter : public TypedConverter<BinaryBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    PyObject* item;
+    PyObject* bytes_obj;
+    OwnedRef tmp;
+    const char* bytes;
+    int64_t length;
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      item = PySequence_GetItem(seq, i);
+      OwnedRef holder(item);
+
+      if (item == Py_None) {
+        RETURN_NOT_OK(typed_builder_->AppendNull());
+        continue;
+      } else if (PyUnicode_Check(item)) {
+        tmp.reset(PyUnicode_AsUTF8String(item));
+        RETURN_IF_PYERROR();
+        bytes_obj = tmp.obj();
+      } else if (PyBytes_Check(item)) {
+        bytes_obj = item;
+      } else {
+        return Status::TypeError("Non-string value encountered");
+      }
+      // No error checking
+      length = PyBytes_GET_SIZE(bytes_obj);
+      bytes = PyBytes_AS_STRING(bytes_obj);
+      RETURN_NOT_OK(typed_builder_->Append(bytes, length));
+    }
+    return Status::OK();
+  }
+};
+
+class UTF8Converter : public TypedConverter<StringBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    PyObject* item;
+    PyObject* bytes_obj;
+    OwnedRef tmp;
+    const char* bytes;
+    int64_t length;
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      item = PySequence_GetItem(seq, i);
+      OwnedRef holder(item);
+
+      if (item == Py_None) {
+        RETURN_NOT_OK(typed_builder_->AppendNull());
+        continue;
+      } else if (!PyUnicode_Check(item)) {
+        return Status::TypeError("Non-unicode value encountered");
+      }
+      tmp.reset(PyUnicode_AsUTF8String(item));
+      RETURN_IF_PYERROR();
+      bytes_obj = tmp.obj();
+
+      // No error checking
+      length = PyBytes_GET_SIZE(bytes_obj);
+      bytes = PyBytes_AS_STRING(bytes_obj);
+      RETURN_NOT_OK(typed_builder_->Append(bytes, length));
+    }
+    return Status::OK();
+  }
+};
+
+class ListConverter : public TypedConverter<ListBuilder> {
+ public:
+  Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;
+
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        RETURN_NOT_OK(typed_builder_->AppendNull());
+      } else {
+        typed_builder_->Append();
+        RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
+      }
+    }
+    return Status::OK();
+  }
+
+ protected:
+  std::shared_ptr<SeqConverter> value_converter_;
+};
+
+// Dynamic constructor for sequence converters
+std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
+  switch (type->type) {
+    case Type::BOOL:
+      return std::make_shared<BoolConverter>();
+    case Type::INT64:
+      return std::make_shared<Int64Converter>();
+    case Type::DATE64:
+      return std::make_shared<DateConverter>();
+    case Type::TIMESTAMP:
+      return std::make_shared<TimestampConverter>();
+    case Type::DOUBLE:
+      return std::make_shared<DoubleConverter>();
+    case Type::BINARY:
+      return std::make_shared<BytesConverter>();
+    case Type::STRING:
+      return std::make_shared<UTF8Converter>();
+    case Type::LIST:
+      return std::make_shared<ListConverter>();
+    case Type::STRUCT:
+    default:
+      return nullptr;
+      break;
+  }
+}
+
+Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
+  builder_ = builder;
+  typed_builder_ = static_cast<ListBuilder*>(builder.get());
+
+  value_converter_ =
+      GetConverter(static_cast<ListType*>(builder->type().get())->value_type());
+  if (value_converter_ == nullptr) {
+    return Status::NotImplemented("value type not implemented");
+  }
+
+  value_converter_->Init(typed_builder_->value_builder());
+  return Status::OK();
+}
+
+Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
+    const std::shared_ptr<ArrayBuilder>& builder) {
+  std::shared_ptr<SeqConverter> converter = GetConverter(type);
+  if (converter == nullptr) {
+    std::stringstream ss;
+    ss << "No type converter implemented for " << type->ToString();
+    return Status::NotImplemented(ss.str());
+  }
+  converter->Init(builder);
+
+  return converter->AppendData(obj);
+}
+
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out) {
+  std::shared_ptr<DataType> type;
+  int64_t size;
+  PyDateTime_IMPORT;
+  RETURN_NOT_OK(InferArrowType(obj, &size, &type));
+
+  // Handle NA / NullType case
+  if (type->type == Type::NA) {
+    out->reset(new NullArray(size));
+    return Status::OK();
+  }
+
+  // Give the sequence converter an array builder
+  std::shared_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(pool, type, &builder));
+  RETURN_NOT_OK(AppendPySequence(obj, type, builder));
+
+  return builder->Finish(out);
+}
+
+}  // namespace py
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
new file mode 100644
index 0000000..7b50990
--- /dev/null
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for converting between CPython built-in data structures and Arrow
+// data structures
+
+#ifndef ARROW_PYTHON_ADAPTERS_BUILTIN_H
+#define ARROW_PYTHON_ADAPTERS_BUILTIN_H
+
+#include <Python.h>
+
+#include <memory>
+
+#include <arrow/type.h>
+
+#include "arrow/util/visibility.h"
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+
+class Array;
+class Status;
+
+namespace py {
+
+ARROW_EXPORT arrow::Status InferArrowType(
+    PyObject* obj, int64_t* size, std::shared_ptr<arrow::DataType>* out_type);
+
+ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj,
+    const std::shared_ptr<arrow::DataType>& type,
+    const std::shared_ptr<arrow::ArrayBuilder>& builder);
+
+ARROW_EXPORT
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out);
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // ARROW_PYTHON_ADAPTERS_BUILTIN_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/common.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
new file mode 100644
index 0000000..a5aea30
--- /dev/null
+++ b/cpp/src/arrow/python/common.cc
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/common.h"
+
+#include <cstdlib>
+#include <mutex>
+#include <sstream>
+
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace py {
+
+static std::mutex memory_pool_mutex;
+static MemoryPool* default_python_pool = nullptr;
+
+void set_default_memory_pool(MemoryPool* pool) {
+  std::lock_guard<std::mutex> guard(memory_pool_mutex);
+  default_python_pool = pool;
+}
+
+MemoryPool* get_memory_pool() {
+  std::lock_guard<std::mutex> guard(memory_pool_mutex);
+  if (default_python_pool) {
+    return default_python_pool;
+  } else {
+    return default_memory_pool();
+  }
+}
+
+// ----------------------------------------------------------------------
+// PyBuffer
+
+PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0) {
+  if (PyObject_CheckBuffer(obj)) {
+    obj_ = PyMemoryView_FromObject(obj);
+    Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_);
+    data_ = reinterpret_cast<const uint8_t*>(buffer->buf);
+    size_ = buffer->len;
+    capacity_ = buffer->len;
+    is_mutable_ = false;
+    Py_INCREF(obj_);
+  }
+}
+
+PyBuffer::~PyBuffer() {
+  PyAcquireGIL lock;
+  Py_DECREF(obj_);
+}
+
+}  // namespace py
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
new file mode 100644
index 0000000..f1be471
--- /dev/null
+++ b/cpp/src/arrow/python/common.h
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_COMMON_H
+#define ARROW_PYTHON_COMMON_H
+
+#include <string>
+
+#include "arrow/python/config.h"
+
+#include "arrow/buffer.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace py {
+
+class PyAcquireGIL {
+ public:
+  PyAcquireGIL() { state_ = PyGILState_Ensure(); }
+
+  ~PyAcquireGIL() { PyGILState_Release(state_); }
+
+ private:
+  PyGILState_STATE state_;
+  DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
+};
+
+#define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2
+
+class OwnedRef {
+ public:
+  OwnedRef() : obj_(nullptr) {}
+
+  explicit OwnedRef(PyObject* obj) : obj_(obj) {}
+
+  ~OwnedRef() {
+    PyAcquireGIL lock;
+    Py_XDECREF(obj_);
+  }
+
+  void reset(PyObject* obj) {
+    if (obj_ != nullptr) { Py_XDECREF(obj_); }
+    obj_ = obj;
+  }
+
+  void release() { obj_ = nullptr; }
+
+  PyObject* obj() const { return obj_; }
+
+ private:
+  PyObject* obj_;
+};
+
+struct PyObjectStringify {
+  OwnedRef tmp_obj;
+  const char* bytes;
+
+  explicit PyObjectStringify(PyObject* obj) {
+    PyObject* bytes_obj;
+    if (PyUnicode_Check(obj)) {
+      bytes_obj = PyUnicode_AsUTF8String(obj);
+      tmp_obj.reset(bytes_obj);
+    } else {
+      bytes_obj = obj;
+    }
+    bytes = PyBytes_AsString(bytes_obj);
+  }
+};
+
+// TODO(wesm): We can just let errors pass through. To be explored later
+#define RETURN_IF_PYERROR()                         \
+  if (PyErr_Occurred()) {                           \
+    PyObject *exc_type, *exc_value, *traceback;     \
+    PyErr_Fetch(&exc_type, &exc_value, &traceback); \
+    PyObjectStringify stringified(exc_value);       \
+    std::string message(stringified.bytes);         \
+    Py_DECREF(exc_type);                            \
+    Py_XDECREF(exc_value);                          \
+    Py_XDECREF(traceback);                          \
+    PyErr_Clear();                                  \
+    return Status::UnknownError(message);           \
+  }
+
+// Return the common PyArrow memory pool
+ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);
+ARROW_EXPORT MemoryPool* get_memory_pool();
+
+class ARROW_EXPORT NumPyBuffer : public Buffer {
+ public:
+  explicit NumPyBuffer(PyArrayObject* arr) : Buffer(nullptr, 0) {
+    arr_ = arr;
+    Py_INCREF(arr);
+
+    data_ = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
+    size_ = PyArray_SIZE(arr_) * PyArray_DESCR(arr_)->elsize;
+    capacity_ = size_;
+  }
+
+  virtual ~NumPyBuffer() { Py_XDECREF(arr_); }
+
+ private:
+  PyArrayObject* arr_;
+};
+
+class ARROW_EXPORT PyBuffer : public Buffer {
+ public:
+  /// Note that the GIL must be held when calling the PyBuffer constructor.
+  ///
+  /// While memoryview objects support multi-demensional buffers, PyBuffer only supports
+  /// one-dimensional byte buffers.
+  explicit PyBuffer(PyObject* obj);
+  ~PyBuffer();
+
+ private:
+  PyObject* obj_;
+};
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // ARROW_PYTHON_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/config.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/config.cc b/cpp/src/arrow/python/config.cc
new file mode 100644
index 0000000..2abc4dd
--- /dev/null
+++ b/cpp/src/arrow/python/config.cc
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <Python.h>
+
+#include "arrow/python/config.h"
+
+namespace arrow {
+namespace py {
+
+void Init() {}
+
+PyObject* numpy_nan = nullptr;
+
+void set_numpy_nan(PyObject* obj) {
+  Py_INCREF(obj);
+  numpy_nan = obj;
+}
+
+}  // namespace py
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/config.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/config.h b/cpp/src/arrow/python/config.h
new file mode 100644
index 0000000..dd554e0
--- /dev/null
+++ b/cpp/src/arrow/python/config.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_CONFIG_H
+#define ARROW_PYTHON_CONFIG_H
+
+#include <Python.h>
+
+#include "arrow/python/numpy_interop.h"
+#include "arrow/util/visibility.h"
+
+#if PY_MAJOR_VERSION >= 3
+#define PyString_Check PyUnicode_Check
+#endif
+
+namespace arrow {
+namespace py {
+
+ARROW_EXPORT
+extern PyObject* numpy_nan;
+
+ARROW_EXPORT
+void Init();
+
+ARROW_EXPORT
+void set_numpy_nan(PyObject* obj);
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // ARROW_PYTHON_CONFIG_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/do_import_numpy.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/do_import_numpy.h b/cpp/src/arrow/python/do_import_numpy.h
new file mode 100644
index 0000000..bb4a382
--- /dev/null
+++ b/cpp/src/arrow/python/do_import_numpy.h
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Trick borrowed from dynd-python for initializing the NumPy array API
+
+// Trigger the array import (inversion of NO_IMPORT_ARRAY)
+#define NUMPY_IMPORT_ARRAY

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
new file mode 100644
index 0000000..add2d9a
--- /dev/null
+++ b/cpp/src/arrow/python/helpers.cc
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/helpers.h"
+
+#include <arrow/api.h>
+
+namespace arrow {
+namespace py {
+
+#define GET_PRIMITIVE_TYPE(NAME, FACTORY) \
+  case Type::NAME:                        \
+    return FACTORY();                     \
+    break;
+
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
+  switch (type) {
+    case Type::NA:
+      return null();
+      GET_PRIMITIVE_TYPE(UINT8, uint8);
+      GET_PRIMITIVE_TYPE(INT8, int8);
+      GET_PRIMITIVE_TYPE(UINT16, uint16);
+      GET_PRIMITIVE_TYPE(INT16, int16);
+      GET_PRIMITIVE_TYPE(UINT32, uint32);
+      GET_PRIMITIVE_TYPE(INT32, int32);
+      GET_PRIMITIVE_TYPE(UINT64, uint64);
+      GET_PRIMITIVE_TYPE(INT64, int64);
+      GET_PRIMITIVE_TYPE(DATE32, date32);
+      GET_PRIMITIVE_TYPE(DATE64, date64);
+      GET_PRIMITIVE_TYPE(BOOL, boolean);
+      GET_PRIMITIVE_TYPE(FLOAT, float32);
+      GET_PRIMITIVE_TYPE(DOUBLE, float64);
+      GET_PRIMITIVE_TYPE(BINARY, binary);
+      GET_PRIMITIVE_TYPE(STRING, utf8);
+    default:
+      return nullptr;
+  }
+}
+
+}  // namespace py
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/helpers.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
new file mode 100644
index 0000000..611e814
--- /dev/null
+++ b/cpp/src/arrow/python/helpers.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_HELPERS_H
+#define PYARROW_HELPERS_H
+
+#include <memory>
+
+#include "arrow/type.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace py {
+
+ARROW_EXPORT
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // PYARROW_HELPERS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/io.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
new file mode 100644
index 0000000..ba82a45
--- /dev/null
+++ b/cpp/src/arrow/python/io.cc
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/io.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <string>
+
+#include "arrow/io/memory.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+namespace py {
+
+// ----------------------------------------------------------------------
+// Python file
+
+PythonFile::PythonFile(PyObject* file) : file_(file) {
+  Py_INCREF(file_);
+}
+
+PythonFile::~PythonFile() {
+  Py_DECREF(file_);
+}
+
+static Status CheckPyError() {
+  if (PyErr_Occurred()) {
+    PyObject *exc_type, *exc_value, *traceback;
+    PyErr_Fetch(&exc_type, &exc_value, &traceback);
+    PyObjectStringify stringified(exc_value);
+    std::string message(stringified.bytes);
+    Py_XDECREF(exc_type);
+    Py_XDECREF(exc_value);
+    Py_XDECREF(traceback);
+    PyErr_Clear();
+    return Status::IOError(message);
+  }
+  return Status::OK();
+}
+
+// This is annoying: because C++11 does not allow implicit conversion of string
+// literals to non-const char*, we need to go through some gymnastics to use
+// PyObject_CallMethod without a lot of pain (its arguments are non-const
+// char*)
+template <typename... ArgTypes>
+static inline PyObject* cpp_PyObject_CallMethod(
+    PyObject* obj, const char* method_name, const char* argspec, ArgTypes... args) {
+  return PyObject_CallMethod(
+      obj, const_cast<char*>(method_name), const_cast<char*>(argspec), args...);
+}
+
+Status PythonFile::Close() {
+  // whence: 0 for relative to start of file, 2 for end of file
+  PyObject* result = cpp_PyObject_CallMethod(file_, "close", "()");
+  Py_XDECREF(result);
+  ARROW_RETURN_NOT_OK(CheckPyError());
+  return Status::OK();
+}
+
+Status PythonFile::Seek(int64_t position, int whence) {
+  // whence: 0 for relative to start of file, 2 for end of file
+  PyObject* result = cpp_PyObject_CallMethod(file_, "seek", "(ii)", position, whence);
+  Py_XDECREF(result);
+  ARROW_RETURN_NOT_OK(CheckPyError());
+  return Status::OK();
+}
+
+Status PythonFile::Read(int64_t nbytes, PyObject** out) {
+  PyObject* result = cpp_PyObject_CallMethod(file_, "read", "(i)", nbytes);
+  ARROW_RETURN_NOT_OK(CheckPyError());
+  *out = result;
+  return Status::OK();
+}
+
+Status PythonFile::Write(const uint8_t* data, int64_t nbytes) {
+  PyObject* py_data =
+      PyBytes_FromStringAndSize(reinterpret_cast<const char*>(data), nbytes);
+  ARROW_RETURN_NOT_OK(CheckPyError());
+
+  PyObject* result = cpp_PyObject_CallMethod(file_, "write", "(O)", py_data);
+  Py_XDECREF(py_data);
+  Py_XDECREF(result);
+  ARROW_RETURN_NOT_OK(CheckPyError());
+  return Status::OK();
+}
+
+Status PythonFile::Tell(int64_t* position) {
+  PyObject* result = cpp_PyObject_CallMethod(file_, "tell", "()");
+  ARROW_RETURN_NOT_OK(CheckPyError());
+
+  *position = PyLong_AsLongLong(result);
+  Py_DECREF(result);
+
+  // PyLong_AsLongLong can raise OverflowError
+  ARROW_RETURN_NOT_OK(CheckPyError());
+
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Seekable input stream
+
+PyReadableFile::PyReadableFile(PyObject* file) {
+  file_.reset(new PythonFile(file));
+}
+
+PyReadableFile::~PyReadableFile() {}
+
+Status PyReadableFile::Close() {
+  PyAcquireGIL lock;
+  return file_->Close();
+}
+
+Status PyReadableFile::Seek(int64_t position) {
+  PyAcquireGIL lock;
+  return file_->Seek(position, 0);
+}
+
+Status PyReadableFile::Tell(int64_t* position) {
+  PyAcquireGIL lock;
+  return file_->Tell(position);
+}
+
+Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) {
+  PyAcquireGIL lock;
+  PyObject* bytes_obj;
+  ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj));
+
+  *bytes_read = PyBytes_GET_SIZE(bytes_obj);
+  std::memcpy(out, PyBytes_AS_STRING(bytes_obj), *bytes_read);
+  Py_DECREF(bytes_obj);
+
+  return Status::OK();
+}
+
+Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr<Buffer>* out) {
+  PyAcquireGIL lock;
+
+  PyObject* bytes_obj;
+  ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj));
+
+  *out = std::make_shared<PyBuffer>(bytes_obj);
+  Py_DECREF(bytes_obj);
+
+  return Status::OK();
+}
+
+Status PyReadableFile::GetSize(int64_t* size) {
+  PyAcquireGIL lock;
+
+  int64_t current_position;
+
+  ARROW_RETURN_NOT_OK(file_->Tell(&current_position));
+
+  ARROW_RETURN_NOT_OK(file_->Seek(0, 2));
+
+  int64_t file_size;
+  ARROW_RETURN_NOT_OK(file_->Tell(&file_size));
+
+  // Restore previous file position
+  ARROW_RETURN_NOT_OK(file_->Seek(current_position, 0));
+
+  *size = file_size;
+  return Status::OK();
+}
+
+bool PyReadableFile::supports_zero_copy() const {
+  return false;
+}
+
+// ----------------------------------------------------------------------
+// Output stream
+
+PyOutputStream::PyOutputStream(PyObject* file) {
+  file_.reset(new PythonFile(file));
+}
+
+PyOutputStream::~PyOutputStream() {}
+
+Status PyOutputStream::Close() {
+  PyAcquireGIL lock;
+  return file_->Close();
+}
+
+Status PyOutputStream::Tell(int64_t* position) {
+  PyAcquireGIL lock;
+  return file_->Tell(position);
+}
+
+Status PyOutputStream::Write(const uint8_t* data, int64_t nbytes) {
+  PyAcquireGIL lock;
+  return file_->Write(data, nbytes);
+}
+
+// ----------------------------------------------------------------------
+// A readable file that is backed by a PyBuffer
+
+PyBytesReader::PyBytesReader(PyObject* obj)
+    : io::BufferReader(std::make_shared<PyBuffer>(obj)) {}
+
+PyBytesReader::~PyBytesReader() {}
+
+}  // namespace py
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/io.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/io.h b/cpp/src/arrow/python/io.h
new file mode 100644
index 0000000..905bd6c
--- /dev/null
+++ b/cpp/src/arrow/python/io.h
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_IO_H
+#define PYARROW_IO_H
+
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/visibility.h"
+
+#include "arrow/python/config.h"
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace py {
+
+// A common interface to a Python file-like object. Must acquire GIL before
+// calling any methods
+class PythonFile {
+ public:
+  explicit PythonFile(PyObject* file);
+  ~PythonFile();
+
+  Status Close();
+  Status Seek(int64_t position, int whence);
+  Status Read(int64_t nbytes, PyObject** out);
+  Status Tell(int64_t* position);
+  Status Write(const uint8_t* data, int64_t nbytes);
+
+ private:
+  PyObject* file_;
+};
+
+class ARROW_EXPORT PyReadableFile : public io::RandomAccessFile {
+ public:
+  explicit PyReadableFile(PyObject* file);
+  virtual ~PyReadableFile();
+
+  Status Close() override;
+
+  Status Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) override;
+  Status Read(int64_t nbytes, std::shared_ptr<Buffer>* out) override;
+
+  Status GetSize(int64_t* size) override;
+
+  Status Seek(int64_t position) override;
+
+  Status Tell(int64_t* position) override;
+
+  bool supports_zero_copy() const override;
+
+ private:
+  std::unique_ptr<PythonFile> file_;
+};
+
+class ARROW_EXPORT PyOutputStream : public io::OutputStream {
+ public:
+  explicit PyOutputStream(PyObject* file);
+  virtual ~PyOutputStream();
+
+  Status Close() override;
+  Status Tell(int64_t* position) override;
+  Status Write(const uint8_t* data, int64_t nbytes) override;
+
+ private:
+  std::unique_ptr<PythonFile> file_;
+};
+
+// A zero-copy reader backed by a PyBuffer object
+class ARROW_EXPORT PyBytesReader : public io::BufferReader {
+ public:
+  explicit PyBytesReader(PyObject* obj);
+  virtual ~PyBytesReader();
+};
+
+// TODO(wesm): seekable output files
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // PYARROW_IO_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/numpy_interop.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/numpy_interop.h b/cpp/src/arrow/python/numpy_interop.h
new file mode 100644
index 0000000..0a4b425
--- /dev/null
+++ b/cpp/src/arrow/python/numpy_interop.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_NUMPY_INTEROP_H
+#define PYARROW_NUMPY_INTEROP_H
+
+#include <Python.h>
+
+#include <numpy/numpyconfig.h>
+
+// Don't use the deprecated Numpy functions
+#ifdef NPY_1_7_API_VERSION
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#else
+#define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
+#define NPY_ARRAY_ALIGNED NPY_ALIGNED
+#define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
+#define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
+#endif
+
+// This is required to be able to access the NumPy C API properly in C++ files
+// other than this main one
+#define PY_ARRAY_UNIQUE_SYMBOL arrow_ARRAY_API
+#ifndef NUMPY_IMPORT_ARRAY
+#define NO_IMPORT_ARRAY
+#endif
+
+#include <numpy/arrayobject.h>
+#include <numpy/ufuncobject.h>
+
+namespace arrow {
+namespace py {
+
+inline int import_numpy() {
+#ifdef NUMPY_IMPORT_ARRAY
+  import_array1(-1);
+  import_umath1(-1);
+#endif
+
+  return 0;
+}
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // PYARROW_NUMPY_INTEROP_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/cpp/src/arrow/python/pandas-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas-test.cc b/cpp/src/arrow/python/pandas-test.cc
new file mode 100644
index 0000000..ae2527e
--- /dev/null
+++ b/cpp/src/arrow/python/pandas-test.cc
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/python/pandas_convert.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+namespace arrow {
+namespace py {
+
+TEST(PandasConversionTest, TestObjectBlockWriteFails) {
+  StringBuilder builder(default_memory_pool());
+  const char value[] = {'\xf1', '\0'};
+
+  for (int i = 0; i < 1000; ++i) {
+    builder.Append(value, strlen(value));
+  }
+
+  std::shared_ptr<Array> arr;
+  ASSERT_OK(builder.Finish(&arr));
+
+  auto f1 = field("f1", utf8());
+  auto f2 = field("f2", utf8());
+  auto f3 = field("f3", utf8());
+  std::vector<std::shared_ptr<Field>> fields = {f1, f2, f3};
+  std::vector<std::shared_ptr<Column>> cols = {std::make_shared<Column>(f1, arr),
+      std::make_shared<Column>(f2, arr), std::make_shared<Column>(f3, arr)};
+
+  auto schema = std::make_shared<Schema>(fields);
+  auto table = std::make_shared<Table>("", schema, cols);
+
+  PyObject* out;
+  Py_BEGIN_ALLOW_THREADS;
+  ASSERT_RAISES(UnknownError, ConvertTableToPandas(table, 2, &out));
+  Py_END_ALLOW_THREADS;
+}
+
+}  // namespace py
+}  // namespace arrow