You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/04/18 14:40:13 UTC

[arrow-rs] 03/14: Removed Python.

This is an automated email from the ASF dual-hosted git repository.

jorgecarleitao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git

commit 4d14b301cda523d363e17ee5d03a581675915a32
Author: Jorge C. Leitao <jo...@gmail.com>
AuthorDate: Sun Apr 18 14:19:46 2021 +0000

    Removed Python.
---
 python/.coveragerc                                 |   19 -
 python/.flake8.cython                              |   20 -
 python/.gitignore                                  |   45 -
 python/CMakeLists.txt                              |  619 ---
 python/MANIFEST.in                                 |   15 -
 python/README.md                                   |   59 -
 python/asv-build.sh                                |   75 -
 python/asv-install.sh                              |   21 -
 python/asv-uninstall.sh                            |   21 -
 python/asv.conf.json                               |  187 -
 python/benchmarks/__init__.py                      |   16 -
 python/benchmarks/array_ops.py                     |   34 -
 python/benchmarks/common.py                        |  349 --
 python/benchmarks/convert_builtins.py              |   87 -
 python/benchmarks/convert_pandas.py                |  121 -
 python/benchmarks/io.py                            |   89 -
 python/benchmarks/microbenchmarks.py               |   45 -
 python/benchmarks/parquet.py                       |  156 -
 python/benchmarks/plasma.py                        |   72 -
 python/benchmarks/streaming.py                     |   70 -
 python/cmake_modules                               |    1 -
 python/examples/flight/client.py                   |  189 -
 python/examples/flight/middleware.py               |  167 -
 python/examples/flight/server.py                   |  154 -
 python/examples/minimal_build/Dockerfile.fedora    |   31 -
 python/examples/minimal_build/Dockerfile.ubuntu    |   38 -
 python/examples/minimal_build/README.md            |   73 -
 python/examples/minimal_build/build_conda.sh       |  119 -
 python/examples/minimal_build/build_venv.sh        |   84 -
 python/examples/plasma/sorting/multimerge.pyx      |  102 -
 python/examples/plasma/sorting/setup.py            |   27 -
 python/examples/plasma/sorting/sort_df.py          |  203 -
 python/pyarrow/__init__.pxd                        |   42 -
 python/pyarrow/__init__.py                         |  504 ---
 python/pyarrow/_compute.pxd                        |   27 -
 python/pyarrow/_compute.pyx                        | 1092 -----
 python/pyarrow/_csv.pxd                            |   46 -
 python/pyarrow/_csv.pyx                            |  952 -----
 python/pyarrow/_cuda.pxd                           |   67 -
 python/pyarrow/_cuda.pyx                           | 1059 -----
 python/pyarrow/_dataset.pyx                        | 2977 -------------
 python/pyarrow/_flight.pyx                         | 2578 ------------
 python/pyarrow/_fs.pxd                             |   94 -
 python/pyarrow/_fs.pyx                             | 1088 -----
 python/pyarrow/_hdfs.pyx                           |  141 -
 python/pyarrow/_json.pyx                           |  249 --
 python/pyarrow/_orc.pxd                            |   53 -
 python/pyarrow/_orc.pyx                            |  111 -
 python/pyarrow/_parquet.pxd                        |  553 ---
 python/pyarrow/_parquet.pyx                        | 1435 -------
 python/pyarrow/_plasma.pyx                         |  868 ----
 python/pyarrow/_s3fs.pyx                           |  260 --
 python/pyarrow/array.pxi                           | 2387 -----------
 python/pyarrow/benchmark.pxi                       |   20 -
 python/pyarrow/benchmark.py                        |   21 -
 python/pyarrow/builder.pxi                         |   82 -
 python/pyarrow/cffi.py                             |   71 -
 python/pyarrow/compat.pxi                          |   65 -
 python/pyarrow/compat.py                           |   29 -
 python/pyarrow/compute.py                          |  493 ---
 python/pyarrow/config.pxi                          |   74 -
 python/pyarrow/csv.py                              |   22 -
 python/pyarrow/cuda.py                             |   25 -
 python/pyarrow/dataset.py                          |  779 ----
 python/pyarrow/error.pxi                           |  231 --
 python/pyarrow/feather.pxi                         |  105 -
 python/pyarrow/feather.py                          |  262 --
 python/pyarrow/filesystem.py                       |  511 ---
 python/pyarrow/flight.py                           |   63 -
 python/pyarrow/fs.py                               |  326 --
 python/pyarrow/gandiva.pyx                         |  482 ---
 python/pyarrow/hdfs.py                             |  240 --
 python/pyarrow/includes/__init__.pxd               |    0
 python/pyarrow/includes/common.pxd                 |  137 -
 python/pyarrow/includes/libarrow.pxd               | 2356 -----------
 python/pyarrow/includes/libarrow_cuda.pxd          |  107 -
 python/pyarrow/includes/libarrow_dataset.pxd       |  384 --
 python/pyarrow/includes/libarrow_flight.pxd        |  552 ---
 python/pyarrow/includes/libarrow_fs.pxd            |  268 --
 python/pyarrow/includes/libgandiva.pxd             |  281 --
 python/pyarrow/includes/libplasma.pxd              |   25 -
 python/pyarrow/io-hdfs.pxi                         |  470 ---
 python/pyarrow/io.pxi                              | 1896 ---------
 python/pyarrow/ipc.pxi                             |  968 -----
 python/pyarrow/ipc.py                              |  233 --
 python/pyarrow/json.py                             |   19 -
 python/pyarrow/jvm.py                              |  335 --
 python/pyarrow/lib.pxd                             |  597 ---
 python/pyarrow/lib.pyx                             |  158 -
 python/pyarrow/memory.pxi                          |  216 -
 python/pyarrow/orc.py                              |  149 -
 python/pyarrow/pandas-shim.pxi                     |  254 --
 python/pyarrow/pandas_compat.py                    | 1226 ------
 python/pyarrow/parquet.py                          | 2076 ---------
 python/pyarrow/plasma.py                           |  152 -
 python/pyarrow/public-api.pxi                      |  418 --
 python/pyarrow/scalar.pxi                          |  927 -----
 python/pyarrow/serialization.pxi                   |  556 ---
 python/pyarrow/serialization.py                    |  504 ---
 python/pyarrow/table.pxi                           | 2266 ----------
 python/pyarrow/tensor.pxi                          |  892 ----
 python/pyarrow/tensorflow/plasma_op.cc             |  391 --
 python/pyarrow/tests/__init__.py                   |    0
 python/pyarrow/tests/arrow_7980.py                 |   30 -
 python/pyarrow/tests/conftest.py                   |  277 --
 .../v0.17.0.version=2-compression=lz4.feather      |  Bin 594 -> 0 bytes
 python/pyarrow/tests/data/orc/README.md            |   22 -
 .../tests/data/orc/TestOrcFile.emptyFile.jsn.gz    |  Bin 50 -> 0 bytes
 .../tests/data/orc/TestOrcFile.emptyFile.orc       |  Bin 523 -> 0 bytes
 .../tests/data/orc/TestOrcFile.test1.jsn.gz        |  Bin 323 -> 0 bytes
 .../pyarrow/tests/data/orc/TestOrcFile.test1.orc   |  Bin 1711 -> 0 bytes
 .../tests/data/orc/TestOrcFile.testDate1900.jsn.gz |  Bin 182453 -> 0 bytes
 .../tests/data/orc/TestOrcFile.testDate1900.orc    |  Bin 30941 -> 0 bytes
 python/pyarrow/tests/data/orc/decimal.jsn.gz       |  Bin 19313 -> 0 bytes
 python/pyarrow/tests/data/orc/decimal.orc          |  Bin 16337 -> 0 bytes
 .../data/parquet/v0.7.1.all-named-index.parquet    |  Bin 3948 -> 0 bytes
 .../v0.7.1.column-metadata-handling.parquet        |  Bin 2012 -> 0 bytes
 python/pyarrow/tests/data/parquet/v0.7.1.parquet   |  Bin 4372 -> 0 bytes
 .../data/parquet/v0.7.1.some-named-index.parquet   |  Bin 4008 -> 0 bytes
 python/pyarrow/tests/deserialize_buffer.py         |   26 -
 python/pyarrow/tests/pandas_examples.py            |  172 -
 python/pyarrow/tests/pandas_threaded_import.py     |   44 -
 python/pyarrow/tests/parquet/common.py             |  177 -
 python/pyarrow/tests/parquet/conftest.py           |   87 -
 python/pyarrow/tests/parquet/test_basic.py         |  586 ---
 .../tests/parquet/test_compliant_nested_type.py    |  113 -
 python/pyarrow/tests/parquet/test_data_types.py    |  524 ---
 python/pyarrow/tests/parquet/test_dataset.py       | 1588 -------
 python/pyarrow/tests/parquet/test_datetime.py      |  373 --
 python/pyarrow/tests/parquet/test_metadata.py      |  477 ---
 python/pyarrow/tests/parquet/test_pandas.py        |  687 ---
 python/pyarrow/tests/parquet/test_parquet_file.py  |  258 --
 .../pyarrow/tests/parquet/test_parquet_writer.py   |  275 --
 python/pyarrow/tests/pyarrow_cython_example.pyx    |   55 -
 python/pyarrow/tests/strategies.py                 |  414 --
 python/pyarrow/tests/test_adhoc_memory_leak.py     |   43 -
 python/pyarrow/tests/test_array.py                 | 2680 ------------
 python/pyarrow/tests/test_builder.py               |   67 -
 python/pyarrow/tests/test_cffi.py                  |  295 --
 python/pyarrow/tests/test_compute.py               | 1243 ------
 python/pyarrow/tests/test_convert_builtin.py       | 2156 ----------
 python/pyarrow/tests/test_csv.py                   | 1345 ------
 python/pyarrow/tests/test_cuda.py                  |  792 ----
 python/pyarrow/tests/test_cuda_numba_interop.py    |  235 --
 python/pyarrow/tests/test_cython.py                |  143 -
 python/pyarrow/tests/test_dataset.py               | 3158 --------------
 python/pyarrow/tests/test_deprecations.py          |   23 -
 python/pyarrow/tests/test_extension_type.py        |  668 ---
 python/pyarrow/tests/test_feather.py               |  792 ----
 python/pyarrow/tests/test_filesystem.py            |   67 -
 python/pyarrow/tests/test_flight.py                | 1808 --------
 python/pyarrow/tests/test_fs.py                    | 1521 -------
 python/pyarrow/tests/test_gandiva.py               |  365 --
 python/pyarrow/tests/test_hdfs.py                  |  442 --
 python/pyarrow/tests/test_io.py                    | 1754 --------
 python/pyarrow/tests/test_ipc.py                   |  962 -----
 python/pyarrow/tests/test_json.py                  |  310 --
 python/pyarrow/tests/test_jvm.py                   |  433 --
 python/pyarrow/tests/test_memory.py                |  156 -
 python/pyarrow/tests/test_misc.py                  |  175 -
 python/pyarrow/tests/test_orc.py                   |  165 -
 python/pyarrow/tests/test_pandas.py                | 4383 --------------------
 python/pyarrow/tests/test_plasma.py                | 1073 -----
 python/pyarrow/tests/test_plasma_tf_op.py          |  104 -
 python/pyarrow/tests/test_scalars.py               |  625 ---
 python/pyarrow/tests/test_schema.py                |  721 ----
 python/pyarrow/tests/test_serialization.py         | 1233 ------
 .../pyarrow/tests/test_serialization_deprecated.py |   56 -
 python/pyarrow/tests/test_sparse_tensor.py         |  491 ---
 python/pyarrow/tests/test_strategies.py            |   61 -
 python/pyarrow/tests/test_table.py                 | 1687 --------
 python/pyarrow/tests/test_tensor.py                |  215 -
 python/pyarrow/tests/test_types.py                 | 1041 -----
 python/pyarrow/tests/util.py                       |  231 --
 python/pyarrow/types.pxi                           | 2781 -------------
 python/pyarrow/types.py                            |  357 --
 python/pyarrow/util.py                             |  152 -
 python/pyarrow/vendored/__init__.py                |   16 -
 python/pyarrow/vendored/version.py                 |  545 ---
 python/pyproject.toml                              |   26 -
 python/requirements-build.txt                      |    4 -
 python/requirements-test.txt                       |    7 -
 python/requirements-wheel-build.txt                |    6 -
 python/requirements-wheel-test.txt                 |   11 -
 python/scripts/test_imports.py                     |   21 -
 python/scripts/test_leak.py                        |  110 -
 python/setup.cfg                                   |   34 -
 python/setup.py                                    |  628 ---
 188 files changed, 87207 deletions(-)

diff --git a/python/.coveragerc b/python/.coveragerc
deleted file mode 100644
index f5dc6e3..0000000
--- a/python/.coveragerc
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[run]
-plugins = Cython.Coverage
diff --git a/python/.flake8.cython b/python/.flake8.cython
deleted file mode 100644
index 4bc1958..0000000
--- a/python/.flake8.cython
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[flake8]
-filename = *.pyx,*.pxd,*.pxi
-ignore = E211,E901,E999,E225,E226,E227,W504
diff --git a/python/.gitignore b/python/.gitignore
deleted file mode 100644
index ef1237a..0000000
--- a/python/.gitignore
+++ /dev/null
@@ -1,45 +0,0 @@
-thirdparty/
-CMakeFiles/
-CMakeCache.txt
-CTestTestfile.cmake
-Makefile
-cmake_install.cmake
-build/
-Testing/
-
-# Python stuff
-
-# Editor temporary/working/backup files
-*flymake*
-
-# Generated sources
-*.c
-*.cpp
-pyarrow/*_api.h
-pyarrow/_generated_version.py
-
-# Bundled headers
-pyarrow/include
-
-# setup.py working directory
-build
-# setup.py dist directory
-dist
-# Coverage
-.coverage
-coverage.xml
-htmlcov
-# Cache
-.cache
-
-# benchmark working dir
-.asv
-pyarrow/_table_api.h
-
-# manylinux temporary files
-manylinux1/arrow
-nm_arrow.log
-visible_symbols.log
-
-# plasma store
-pyarrow/plasma-store-server
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
deleted file mode 100644
index 0714aa4..0000000
--- a/python/CMakeLists.txt
+++ /dev/null
@@ -1,619 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more cod ntributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party
-# projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt
-
-cmake_minimum_required(VERSION 3.2)
-project(pyarrow)
-
-# Running from a Python sdist tarball
-set(LOCAL_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/cmake_modules")
-if(EXISTS "${LOCAL_CMAKE_MODULES}")
-  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LOCAL_CMAKE_MODULES})
-endif()
-
-# Running from a git source tree
-set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
-if(EXISTS "${CPP_CMAKE_MODULES}")
-  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES})
-endif()
-
-include(CMakeParseArguments)
-
-# Only interpret if() arguments as variables or keywords when unquoted.
-# https://www.cmake.org/cmake/help/latest/policy/CMP0054.html
-cmake_policy(SET CMP0054 NEW)
-
-# Use the first Python installation on PATH, not the newest one
-set(Python3_FIND_STRATEGY "LOCATION")
-# On Windows, use registry last, not first
-set(Python3_FIND_REGISTRY "LAST")
-# On macOS, use framework last, not first
-set(Python3_FIND_FRAMEWORK "LAST")
-
-# Allow "make install" to not depend on all targets.
-#
-# Must be declared in the top-level CMakeLists.txt.
-set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true)
-
-set(CMAKE_MACOSX_RPATH 1)
-if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
-  set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
-else()
-  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9)
-endif()
-
-# Generate a Clang compile_commands.json "compilation database" file for use
-# with various development tools, such as Vim's YouCompleteMe plugin.
-# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
-if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
-  set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
-endif()
-
-# Top level cmake dir
-if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
-  option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF)
-  option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF)
-  option(PYARROW_BUILD_DATASET "Build the PyArrow Dataset integration" OFF)
-  option(PYARROW_BUILD_GANDIVA "Build the PyArrow Gandiva integration" OFF)
-  option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF)
-  option(PYARROW_PARQUET_USE_SHARED "Rely on parquet shared libraries where relevant" ON)
-  option(PYARROW_BOOST_USE_SHARED
-         "Rely on boost shared libraries on linking static parquet" ON)
-  option(PYARROW_BUILD_PLASMA "Build the PyArrow Plasma integration" OFF)
-  option(PYARROW_USE_TENSORFLOW "Build PyArrow with TensorFlow support" OFF)
-  option(PYARROW_BUILD_ORC "Build the PyArrow ORC integration" OFF)
-  option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
-  option(PYARROW_BUNDLE_BOOST "Bundle the Boost libraries when we bundle Arrow C++" OFF)
-  option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF)
-  set(PYARROW_CXXFLAGS "" CACHE STRING "Compiler flags to append when compiling Arrow")
-endif()
-
-find_program(CCACHE_FOUND ccache)
-if(CCACHE_FOUND)
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
-endif(CCACHE_FOUND)
-
-#
-# Compiler flags
-#
-
-include(BuildUtils)
-
-# Cython generated code emits way to many warnings at CHECKIN and EVERYTHING
-set(BUILD_WARNING_LEVEL "PRODUCTION")
-
-# This must be synchronized with the definition in
-# cpp/cmake_modules/DefineOptions.cmake.
-set(ARROW_ARMV8_ARCH
-    "armv8-a"
-    CACHE STRING "Arm64 arch and extensions: armv8-a, armv8-a or armv8-a+crc+crypto")
-include(SetupCxxFlags)
-
-# Add common flags
-set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}")
-
-if(MSVC)
-  # MSVC version of -Wno-return-type-c-linkage
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4190")
-
-  # Cython generates some bitshift expressions that MSVC does not like in
-  # __Pyx_PyFloat_DivideObjC
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4293")
-
-  # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning
-  # seem harmless, and probably not worth the effort of working around it
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4800")
-
-  # See https://github.com/cython/cython/issues/2731. Change introduced in
-  # Cython 0.29.1 causes "unsafe use of type 'bool' in operation"
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4804")
-else()
-  # Enable perf and other tools to work properly
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
-
-  # Suppress Cython warnings
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized")
-
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"
-     OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    # Cython warnings in clang
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized")
-
-    # We have public Cython APIs which return C++ types, which are in an extern
-    # "C" blog (no symbol mangling) and clang doesn't like this
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage")
-  endif()
-endif()
-
-# For any C code, use the same flags.
-set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
-
-# Add C++-only flags, like -std=c++11
-set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}")
-
-if(MSVC)
-  # MSVC makes its own output directories based on the build configuration
-  set(BUILD_SUBDIR_NAME "")
-else()
-  # Set compile output directory
-  string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
-endif()
-
-# If build in-source, create the latest symlink. If build out-of-source, which is
-# preferred, simply output the binaries in the build folder
-if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
-  set(BUILD_OUTPUT_ROOT_DIRECTORY
-      "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}")
-  # Link build/latest to the current build directory, to avoid developers
-  # accidentally running the latest debug build when in fact they're building
-  # release builds.
-  file(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
-  if(NOT APPLE)
-    set(MORE_ARGS "-T")
-  endif()
-  execute_process(COMMAND ln
-                          ${MORE_ARGS}
-                          -sf
-                          ${BUILD_OUTPUT_ROOT_DIRECTORY}
-                          ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
-else()
-  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
-endif()
-
-message(STATUS "Generator: ${CMAKE_GENERATOR}")
-message(STATUS "Build output directory: ${BUILD_OUTPUT_ROOT_DIRECTORY}")
-
-# where to put generated archives (.a files)
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
-set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
-
-# where to put generated libraries (.so files)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
-set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
-
-# where to put generated binaries
-set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
-
-if(PYARROW_USE_TENSORFLOW)
-  # TensorFlow uses the old GLIBCXX ABI, so we have to use it too
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-endif()
-
-# Python and Numpy libraries
-find_package(Python3Alt REQUIRED)
-include(UseCython)
-
-include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
-
-#
-# Dependencies
-#
-
-if(PYARROW_BUILD_FLIGHT)
-  set(ARROW_FLIGHT TRUE)
-endif()
-
-# Arrow
-find_package(ArrowPython REQUIRED)
-include_directories(SYSTEM ${ARROW_INCLUDE_DIR})
-
-function(bundle_arrow_lib library_path)
-  set(options)
-  set(one_value_args SO_VERSION)
-  set(multi_value_args)
-  cmake_parse_arguments(ARG
-                        "${options}"
-                        "${one_value_args}"
-                        "${multi_value_args}"
-                        ${ARGN})
-  if(ARG_UNPARSED_ARGUMENTS)
-    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
-  endif()
-
-  get_filename_component(LIBRARY_DIR ${${library_path}} DIRECTORY)
-  get_filename_component(LIBRARY_NAME ${${library_path}} NAME_WE)
-
-  # Only copy the shared library with ABI version on Linux and macOS
-
-  if(MSVC)
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
-  elseif(APPLE)
-    configure_file(
-      ${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
-  else()
-    configure_file(
-      ${${library_path}}.${ARG_SO_VERSION}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}
-      COPYONLY)
-  endif()
-
-endfunction(bundle_arrow_lib)
-
-function(bundle_arrow_import_lib library_path)
-  get_filename_component(LIBRARY_DIR ${${library_path}} DIRECTORY)
-  get_filename_component(LIBRARY_NAME ${${library_path}} NAME_WE)
-  configure_file(${${library_path}} ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.lib
-                 COPYONLY)
-endfunction(bundle_arrow_import_lib)
-
-function(bundle_boost_lib library_path)
-  get_filename_component(LIBRARY_NAME ${${library_path}} NAME)
-  get_filename_component(LIBRARY_NAME_WE ${${library_path}} NAME_WE)
-  configure_file(${${library_path}} ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}
-                 COPYONLY)
-  set(Boost_SO_VERSION
-      "${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}")
-  if(APPLE)
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}
-      COPYONLY)
-  else()
-    configure_file(
-      ${${library_path}}
-      ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME_WE}${CMAKE_SHARED_LIBRARY_SUFFIX}.${Boost_SO_VERSION}
-      COPYONLY)
-  endif()
-endfunction()
-
-function(bundle_arrow_dependency library_name)
-  if(MSVC)
-    if(DEFINED ENV{CONDA_PREFIX})
-      file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}\\Library" SHARED_LIB_HOME)
-    endif()
-  else()
-    if(DEFINED ENV{CONDA_PREFIX})
-      file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}" SHARED_LIB_HOME)
-    endif()
-  endif()
-  if(DEFINED ENV{${library_name}_HOME})
-    file(TO_CMAKE_PATH "$ENV{${library_name}_HOME}" SHARED_LIB_HOME)
-  endif()
-  arrow_build_shared_library_name(shared_lib_name "${library_name}")
-  unset(SHARED_LIB_PATH CACHE)
-  if(MSVC)
-    set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES})
-    # .dll isn't found by find_library with MSVC because .dll isn't included in
-    # CMAKE_FIND_LIBRARY_SUFFIXES.
-    list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}")
-  endif()
-  if(SHARED_LIB_HOME)
-    find_library(SHARED_LIB_PATH
-                 NAMES "${shared_lib_name}"
-                 PATHS "${SHARED_LIB_HOME}"
-                 PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
-                 NO_DEFAULT_PATH)
-  else()
-    find_library(SHARED_LIB_PATH
-                 NAMES "${shared_lib_name}"
-                 PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES})
-  endif()
-  if(MSVC)
-    set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL})
-  endif()
-  if(SHARED_LIB_PATH)
-    get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
-    get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
-    message(
-      STATUS
-        "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}")
-    configure_file(${SHARED_LIB_REALPATH}
-                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${SHARED_LIB_NAME} COPYONLY)
-  else()
-    message(FATAL_ERROR "Unable to bundle dependency: ${library_name}")
-  endif()
-endfunction()
-
-# Always bundle includes
-file(COPY ${ARROW_INCLUDE_DIR}/arrow DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
-
-if(PYARROW_BUNDLE_ARROW_CPP)
-  # arrow
-  bundle_arrow_lib(ARROW_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-  bundle_arrow_lib(ARROW_PYTHON_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-
-  # boost
-  if(PYARROW_BOOST_USE_SHARED AND PYARROW_BUNDLE_BOOST)
-    set(Boost_USE_STATIC_LIBS OFF)
-    set(Boost_USE_MULTITHREADED ON)
-    if(MSVC AND ARROW_USE_STATIC_CRT)
-      set(Boost_USE_STATIC_RUNTIME ON)
-    endif()
-    set(Boost_ADDITIONAL_VERSIONS
-        "1.66.0"
-        "1.66"
-        "1.65.0"
-        "1.65"
-        "1.64.0"
-        "1.64"
-        "1.63.0"
-        "1.63"
-        "1.62.0"
-        "1.61"
-        "1.61.0"
-        "1.62"
-        "1.60.0"
-        "1.60")
-    list(GET Boost_ADDITIONAL_VERSIONS 0 BOOST_LATEST_VERSION)
-    string(REPLACE "." "_" BOOST_LATEST_VERSION_IN_PATH ${BOOST_LATEST_VERSION})
-    if(MSVC)
-      # disable autolinking in boost
-      add_definitions(-DBOOST_ALL_NO_LIB)
-    endif()
-    find_package(Boost COMPONENTS regex REQUIRED)
-    bundle_boost_lib(Boost_REGEX_LIBRARY)
-  endif()
-
-  if(MSVC)
-    # TODO(kszucs): locate msvcp140.dll in a portable fashion and bundle it
-    bundle_arrow_import_lib(ARROW_IMPORT_LIB)
-    bundle_arrow_import_lib(ARROW_PYTHON_IMPORT_LIB)
-  endif()
-endif()
-
-#
-# Subdirectories
-#
-
-if(UNIX)
-  set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-endif()
-
-set(CYTHON_EXTENSIONS
-    lib
-    _fs
-    _compute
-    _csv
-    _json)
-
-set(LINK_LIBS arrow_shared arrow_python_shared)
-
-if(PYARROW_BUILD_S3)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _s3fs)
-endif()
-
-if(PYARROW_BUILD_HDFS)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _hdfs)
-endif()
-
-if(PYARROW_BUILD_CUDA)
-  # Arrow CUDA
-  find_package(ArrowCUDA REQUIRED)
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    bundle_arrow_lib(ARROW_CUDA_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-    if(MSVC)
-      bundle_arrow_import_lib(ARROW_CUDA_IMPORT_LIB)
-    endif()
-  endif()
-  set(CUDA_LINK_LIBS arrow_cuda_shared)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _cuda)
-endif()
-
-# Dataset
-if(PYARROW_BUILD_DATASET)
-  # Arrow Dataset
-  find_package(ArrowDataset REQUIRED)
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    bundle_arrow_lib(ARROW_DATASET_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-    if(MSVC)
-      bundle_arrow_import_lib(ARROW_DATASET_IMPORT_LIB)
-    endif()
-  endif()
-
-  set(DATASET_LINK_LIBS arrow_dataset_shared)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _dataset)
-endif()
-
-if(PYARROW_BUILD_PARQUET)
-  # Parquet
-  find_package(Parquet REQUIRED)
-
-  include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    file(COPY ${PARQUET_INCLUDE_DIR}/parquet
-         DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
-  endif()
-
-  if(PYARROW_PARQUET_USE_SHARED)
-    if(PYARROW_BUNDLE_ARROW_CPP)
-      bundle_arrow_lib(PARQUET_SHARED_LIB SO_VERSION ${PARQUET_SO_VERSION})
-      if(MSVC)
-        bundle_arrow_import_lib(PARQUET_IMPORT_LIB)
-      endif()
-    endif()
-    set(PARQUET_LINK_LIBS parquet_shared)
-  else()
-    find_package(Thrift)
-    if(PYARROW_BOOST_USE_SHARED)
-      set(Boost_USE_STATIC_LIBS OFF)
-    else()
-      set(Boost_USE_STATIC_LIBS ON)
-    endif()
-    find_package(Boost COMPONENTS regex REQUIRED)
-    add_thirdparty_lib(boost_regex STATIC_LIB ${Boost_REGEX_LIBRARY_RELEASE})
-    add_thirdparty_lib(thrift STATIC_LIB ${THRIFT_STATIC_LIB})
-    set(PARQUET_LINK_LIBS parquet_static thrift_static boost_regex_static)
-  endif()
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _parquet)
-endif()
-
-# Plasma
-if(PYARROW_BUILD_PLASMA)
-  find_package(Plasma REQUIRED)
-
-  include_directories(SYSTEM ${PLASMA_INCLUDE_DIR})
-
-  file(COPY ${ARROW_INCLUDE_DIR}/plasma
-       DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    bundle_arrow_lib(PLASMA_SHARED_LIB SO_VERSION ${PLASMA_SO_VERSION})
-  endif()
-  set(PLASMA_LINK_LIBS plasma_shared)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _plasma)
-  file(COPY ${PLASMA_STORE_SERVER} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY})
-endif()
-
-if(PYARROW_BUILD_ORC)
-  # ORC
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _orc)
-endif()
-
-# Flight
-if(PYARROW_BUILD_FLIGHT)
-  # Arrow Flight
-  find_package(ArrowPythonFlight REQUIRED)
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    bundle_arrow_lib(ARROW_FLIGHT_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-    bundle_arrow_lib(ARROW_PYTHON_FLIGHT_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-    if(MSVC)
-      bundle_arrow_import_lib(ARROW_FLIGHT_IMPORT_LIB)
-      bundle_arrow_import_lib(ARROW_PYTHON_FLIGHT_IMPORT_LIB)
-      # XXX Hardcoded library names because CMake is too stupid to give us
-      # the shared library paths.
-      # https://gitlab.kitware.com/cmake/cmake/issues/16210
-      # bundle_arrow_dependency(libcrypto-1_1-x64)
-      # bundle_arrow_dependency(libssl-1_1-x64)
-    endif()
-  endif()
-
-  set(FLIGHT_LINK_LIBS arrow_flight_shared arrow_python_flight_shared)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} _flight)
-endif()
-
-# Gandiva
-if(PYARROW_BUILD_GANDIVA)
-  find_package(Gandiva REQUIRED)
-
-  include_directories(SYSTEM ${GANDIVA_INCLUDE_DIR})
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    file(COPY ${GANDIVA_INCLUDE_DIR}/gandiva
-         DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
-
-    bundle_arrow_lib(GANDIVA_SHARED_LIB SO_VERSION ${ARROW_SO_VERSION})
-
-    if(MSVC)
-      bundle_arrow_import_lib(GANDIVA_IMPORT_LIB)
-    endif()
-  endif()
-
-  set(GANDIVA_LINK_LIBS gandiva_shared)
-  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} gandiva)
-endif()
-
-#
-# Setup and build Cython modules
-#
-
-if(PYARROW_GENERATE_COVERAGE)
-  set(CYTHON_FLAGS "${CYTHON_FLAGS}" "-Xlinetrace=True")
-endif()
-
-foreach(module ${CYTHON_EXTENSIONS})
-  string(REPLACE "." ";" directories ${module})
-  list(GET directories -1 module_name)
-  list(REMOVE_AT directories -1)
-
-  string(REPLACE "." "/" module_root "${module}")
-  set(module_SRC pyarrow/${module_root}.pyx)
-  set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX 1)
-
-  cython_add_module(${module_name} ${module_name}_pyx ${module_name}_output ${module_SRC})
-
-  if(directories)
-    string(REPLACE ";" "/" module_output_directory ${directories})
-    set_target_properties(${module_name}
-                          PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${module_output_directory})
-  endif()
-
-  if(PYARROW_BUNDLE_ARROW_CPP)
-    # In the event that we are bundling the shared libraries (e.g. in a
-    # manylinux1 wheel), we need to set the RPATH of the extensions to the
-    # root of the pyarrow/ package so that libarrow/libarrow_python are able
-    # to be loaded properly
-    if(APPLE)
-      set(module_install_rpath "@loader_path/")
-    else()
-      set(module_install_rpath "\$ORIGIN")
-    endif()
-
-    # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython
-    # modules in interior directories. Since all of our C extensions and
-    # bundled libraries are in the same place, we can skip this part
-
-    # list(LENGTH directories i)
-    # while(${i} GREATER 0)
-    #   set(module_install_rpath "${module_install_rpath}/..")
-    #   math(EXPR i "${i} - 1" )
-    # endwhile(${i} GREATER 0)
-
-    set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath})
-  endif()
-
-  if(PYARROW_GENERATE_COVERAGE)
-    set_target_properties(${module_name}
-                          PROPERTIES COMPILE_DEFINITIONS
-                                     "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
-  endif()
-
-  target_link_libraries(${module_name} PRIVATE ${LINK_LIBS})
-
-  # Generated files will be moved to the right directory by setup.py.
-endforeach(module)
-
-# Additional link libraries
-
-if(PYARROW_BUILD_CUDA)
-  target_link_libraries(_cuda PRIVATE ${CUDA_LINK_LIBS})
-endif()
-
-if(PYARROW_BUILD_FLIGHT)
-  target_link_libraries(_flight PRIVATE ${FLIGHT_LINK_LIBS})
-endif()
-
-if(PYARROW_BUILD_DATASET)
-  target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS})
-endif()
-
-if(PYARROW_BUILD_GANDIVA)
-  target_link_libraries(gandiva PRIVATE ${GANDIVA_LINK_LIBS})
-endif()
-
-if(PYARROW_BUILD_PARQUET)
-  target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
-endif()
-
-if(PYARROW_BUILD_PLASMA)
-  target_link_libraries(_plasma PRIVATE ${PLASMA_LINK_LIBS})
-endif()
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
deleted file mode 100644
index ed7012e..0000000
--- a/python/MANIFEST.in
+++ /dev/null
@@ -1,15 +0,0 @@
-include README.md
-include ../LICENSE.txt
-include ../NOTICE.txt
-
-global-include CMakeLists.txt
-graft pyarrow
-graft cmake_modules
-
-global-exclude *.so
-global-exclude *.pyc
-global-exclude *~
-global-exclude \#*
-global-exclude .git*
-global-exclude .DS_Store
-prune .asv
diff --git a/python/README.md b/python/README.md
deleted file mode 100644
index def98a3..0000000
--- a/python/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-## Python library for Apache Arrow
-
-[![pypi](https://img.shields.io/pypi/v/pyarrow.svg)](https://pypi.org/project/pyarrow/) [![conda-forge](https://img.shields.io/conda/vn/conda-forge/pyarrow.svg)](https://anaconda.org/conda-forge/pyarrow)
-
-This library provides a Python API for functionality provided by the Arrow C++
-libraries, along with tools for Arrow integration and interoperability with
-pandas, NumPy, and other software in the Python ecosystem.
-
-## Installing
-
-Across platforms, you can install a recent version of pyarrow with the conda
-package manager:
-
-```shell
-conda install pyarrow -c conda-forge
-```
-
-On Linux, macOS, and Windows, you can also install binary wheels from PyPI with
-pip:
-
-```shell
-pip install pyarrow
-```
-
-If you encounter any issues importing the pip wheels on Windows, you may need
-to install the [Visual C++ Redistributable for Visual Studio 2015][6].
-
-## Development
-
-See [Python Development][2] in the documentation subproject.
-
-### Building the documentation
-
-See [documentation build instructions][1] in the documentation subproject.
-
-[1]: https://github.com/apache/arrow/blob/master/docs/source/developers/documentation.rst
-[2]: https://github.com/apache/arrow/blob/master/docs/source/developers/python.rst
-[3]: https://github.com/pandas-dev/pandas
-[5]: https://arrow.apache.org/docs/latest/python/benchmarks.html
-[6]: https://www.microsoft.com/en-us/download/details.aspx?id=48145
\ No newline at end of file
diff --git a/python/asv-build.sh b/python/asv-build.sh
deleted file mode 100755
index 7de5ff4..0000000
--- a/python/asv-build.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-# ASV doesn't activate its conda environment for us
-if [ -z "$ASV_ENV_DIR" ]; then exit 1; fi
-
-if [ -z "$CONDA_HOME" ]; then
-  echo "Please set \$CONDA_HOME to point to your root conda installation"
-  exit 1;
-fi
-
-eval "$($CONDA_HOME/bin/conda shell.bash hook)"
-
-conda activate $ASV_ENV_DIR
-echo "== Conda Prefix for benchmarks: " $CONDA_PREFIX " =="
-
-# Build Arrow C++ libraries
-export ARROW_HOME=$CONDA_PREFIX
-export PARQUET_HOME=$CONDA_PREFIX
-export ORC_HOME=$CONDA_PREFIX
-export PROTOBUF_HOME=$CONDA_PREFIX
-export BOOST_ROOT=$CONDA_PREFIX
-
-pushd ../cpp
-mkdir -p build
-pushd build
-
-cmake -GNinja \
-      -DCMAKE_BUILD_TYPE=release \
-      -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-      -DARROW_CXXFLAGS=$CXXFLAGS \
-      -DARROW_USE_GLOG=off \
-      -DARROW_FLIGHT=on \
-      -DARROW_ORC=on \
-      -DARROW_PARQUET=on \
-      -DARROW_PYTHON=on \
-      -DARROW_PLASMA=on \
-      -DARROW_S3=on \
-      -DARROW_BUILD_TESTS=off \
-      ..
-cmake --build . --target install
-
-popd
-popd
-
-# Build pyarrow wrappers
-export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.1
-export PYARROW_BUILD_TYPE=release
-export PYARROW_PARALLEL=8
-export PYARROW_WITH_FLIGHT=1
-export PYARROW_WITH_ORC=1
-export PYARROW_WITH_PARQUET=1
-export PYARROW_WITH_PLASMA=1
-
-python setup.py clean
-find pyarrow -name "*.so" -delete
-python setup.py develop
diff --git a/python/asv-install.sh b/python/asv-install.sh
deleted file mode 100755
index beef730..0000000
--- a/python/asv-install.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Deliberately empty, but exists so that we don't have to change
-# asv.conf.json if we need specific commands here.
diff --git a/python/asv-uninstall.sh b/python/asv-uninstall.sh
deleted file mode 100755
index beef730..0000000
--- a/python/asv-uninstall.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Deliberately empty, but exists so that we don't have to change
-# asv.conf.json if we need specific commands here.
diff --git a/python/asv.conf.json b/python/asv.conf.json
deleted file mode 100644
index cdb178c..0000000
--- a/python/asv.conf.json
+++ /dev/null
@@ -1,187 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-{
-    // The version of the config file format.  Do not change, unless
-    // you know what you are doing.
-    "version": 1,
-
-    // The name of the project being benchmarked
-    "project": "pyarrow",
-
-    // The project's homepage
-    "project_url": "https://arrow.apache.org/",
-
-    // The URL or local path of the source code repository for the
-    // project being benchmarked
-    "repo": "..",
-
-    // The Python project's subdirectory in your repo.  If missing or
-    // the empty string, the project is assumed to be located at the root
-    // of the repository.
-    "repo_subdir": "python",
-
-    // Custom build commands for Arrow.
-    "build_command": ["/bin/bash {build_dir}/asv-build.sh"],
-    "install_command": ["/bin/bash {build_dir}/asv-install.sh"],
-    "uninstall_command": ["/bin/bash {build_dir}/asv-uninstall.sh"],
-
-    // List of branches to benchmark. If not provided, defaults to "master"
-    // (for git) or "default" (for mercurial).
-    // "branches": ["master"], // for git
-    // "branches": ["default"],    // for mercurial
-
-    // The DVCS being used.  If not set, it will be automatically
-    // determined from "repo" by looking at the protocol in the URL
-    // (if remote), or by looking for special directories, such as
-    // ".git" (if local).
-    "dvcs": "git",
-
-    // The tool to use to create environments.  May be "conda",
-    // "virtualenv" or other value depending on the plugins in use.
-    // If missing or the empty string, the tool will be automatically
-    // determined by looking for tools on the PATH environment
-    // variable.
-    "environment_type": "conda",
-    // Avoid conda-forge to avoid C++ ABI issues
-    "conda_channels": ["defaults"],
-
-    // the base URL to show a commit for the project.
-    "show_commit_url": "https://github.com/apache/arrow/commit/",
-
-    // The Pythons you'd like to test against.  If not provided, defaults
-    // to the current version of Python used to run `asv`.
-    "pythons": ["3.7"],
-
-    // The matrix of dependencies to test.  Each key is the name of a
-    // package (in PyPI) and the values are version numbers.  An empty
-    // list or empty string indicates to just test against the default
-    // (latest) version. null indicates that the package is to not be
-    // installed. If the package to be tested is only available from
-    // PyPi, and the 'environment_type' is conda, then you can preface
-    // the package name by 'pip+', and the package will be installed via
-    // pip (with all the conda available packages installed first,
-    // followed by the pip installed packages).
-    //
-    // "matrix": {
-    //     "numpy": ["1.6", "1.7"],
-    //     "six": ["", null],        // test with and without six installed
-    //     "pip+emcee": [""],   // emcee is only available for install with pip.
-    // },
-    "matrix": {
-        // Use older boost since it works on more editions of the project
-        "aws-sdk-cpp": [],
-        "boost-cpp": ["1.68.0"],
-        "brotli": [],
-        "cmake": [],
-        "cython": [],
-        "flatbuffers": [],
-        "grpc-cpp": [],
-        "libprotobuf": [],
-        "lz4-c": [],
-        "ninja": [],
-        "numpy": [],
-        "pandas": ["0.25.1"],
-        "pip+setuptools_scm": [],
-        "rapidjson": [],
-        "re2": [],
-        "snappy": [],
-        "thrift-cpp": [],
-        "zstd": [],
-    },
-
-    // Combinations of libraries/python versions can be excluded/included
-    // from the set to test. Each entry is a dictionary containing additional
-    // key-value pairs to include/exclude.
-    //
-    // An exclude entry excludes entries where all values match. The
-    // values are regexps that should match the whole string.
-    //
-    // An include entry adds an environment. Only the packages listed
-    // are installed. The 'python' key is required. The exclude rules
-    // do not apply to includes.
-    //
-    // In addition to package names, the following keys are available:
-    //
-    // - python
-    //     Python version, as in the *pythons* variable above.
-    // - environment_type
-    //     Environment type, as above.
-    // - sys_platform
-    //     Platform, as in sys.platform. Possible values for the common
-    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
-    //
-    // "exclude": [
-    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
-    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
-    // ],
-    //
-    // "include": [
-    //     // additional env for python2.7
-    //     {"python": "2.7", "numpy": "1.8"},
-    //     // additional env if run on windows+conda
-    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
-    // ],
-
-    // The directory (relative to the current directory) that benchmarks are
-    // stored in.  If not provided, defaults to "benchmarks"
-    "benchmark_dir": "benchmarks",
-
-    // The directory (relative to the current directory) to cache the Python
-    // environments in.  If not provided, defaults to "env"
-    "env_dir": ".asv/env",
-
-    // The directory (relative to the current directory) that raw benchmark
-    // results are stored in.  If not provided, defaults to "results".
-    "results_dir": ".asv/results",
-
-    // The directory (relative to the current directory) that the html tree
-    // should be written to.  If not provided, defaults to "html".
-    "html_dir": "build/benchmarks/html",
-
-    // The number of characters to retain in the commit hashes.
-    // "hash_length": 8,
-
-    // `asv` will cache wheels of the recent builds in each
-    // environment, making them faster to install next time.  This is
-    // number of builds to keep, per environment.
-    // "wheel_cache_size": 0,
-
-    // The commits after which the regression search in `asv publish`
-    // should start looking for regressions. Dictionary whose keys are
-    // regexps matching to benchmark names, and values corresponding to
-    // the commit (exclusive) after which to start looking for
-    // regressions.  The default is to start from the first commit
-    // with results. If the commit is `null`, regression detection is
-    // skipped for the matching benchmark.
-    //
-    // "regressions_first_commits": {
-    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
-    //    "another_benchmark": null,   // Skip regression detection altogether
-    // }
-
-    // The thresholds for relative change in results, after which `asv
-    // publish` starts reporting regressions. Dictionary of the same
-    // form as in ``regressions_first_commits``, with values
-    // indicating the thresholds.  If multiple entries match, the
-    // maximum is taken. If no entry matches, the default is 5%.
-    //
-    // "regressions_thresholds": {
-    //    "some_benchmark": 0.01,     // Threshold of 1%
-    //    "another_benchmark": 0.5,   // Threshold of 50%
-    // }
-}
diff --git a/python/benchmarks/__init__.py b/python/benchmarks/__init__.py
deleted file mode 100644
index 13a8339..0000000
--- a/python/benchmarks/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/python/benchmarks/array_ops.py b/python/benchmarks/array_ops.py
deleted file mode 100644
index 696b171..0000000
--- a/python/benchmarks/array_ops.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow as pa
-
-
-class ScalarAccess(object):
-    n = 10 ** 5
-
-    def setUp(self):
-        self._array = pa.array(list(range(self.n)), type=pa.int64())
-        self._array_items = list(self._array)
-
-    def time_getitem(self):
-        for i in range(self.n):
-            self._array[i]
-
-    def time_as_py(self):
-        for item in self._array_items:
-            item.as_py()
diff --git a/python/benchmarks/common.py b/python/benchmarks/common.py
deleted file mode 100644
index 48526a4..0000000
--- a/python/benchmarks/common.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import codecs
-import decimal
-from functools import partial
-import itertools
-import sys
-import unicodedata
-
-import numpy as np
-
-import pyarrow as pa
-
-
-KILOBYTE = 1 << 10
-MEGABYTE = KILOBYTE * KILOBYTE
-
-DEFAULT_NONE_PROB = 0.3
-
-
-def _multiplicate_sequence(base, target_size):
-    q, r = divmod(target_size, len(base))
-    return [base] * q + [base[:r]]
-
-
-def get_random_bytes(n, seed=42):
-    """
-    Generate a random bytes object of size *n*.
-    Note the result might be compressible.
-    """
-    rnd = np.random.RandomState(seed)
-    # Computing a huge random bytestring can be costly, so we get at most
-    # 100KB and duplicate the result as needed
-    base_size = 100003
-    q, r = divmod(n, base_size)
-    if q == 0:
-        result = rnd.bytes(r)
-    else:
-        base = rnd.bytes(base_size)
-        result = b''.join(_multiplicate_sequence(base, n))
-    assert len(result) == n
-    return result
-
-
-def get_random_ascii(n, seed=42):
-    """
-    Get a random ASCII-only unicode string of size *n*.
-    """
-    arr = np.frombuffer(get_random_bytes(n, seed=seed), dtype=np.int8) & 0x7f
-    result, _ = codecs.ascii_decode(arr)
-    assert isinstance(result, str)
-    assert len(result) == n
-    return result
-
-
-def _random_unicode_letters(n, seed=42):
-    """
-    Generate a string of random unicode letters (slow).
-    """
-    def _get_more_candidates():
-        return rnd.randint(0, sys.maxunicode, size=n).tolist()
-
-    rnd = np.random.RandomState(seed)
-    out = []
-    candidates = []
-
-    while len(out) < n:
-        if not candidates:
-            candidates = _get_more_candidates()
-        ch = chr(candidates.pop())
-        # XXX Do we actually care that the code points are valid?
-        if unicodedata.category(ch)[0] == 'L':
-            out.append(ch)
-    return out
-
-
-_1024_random_unicode_letters = _random_unicode_letters(1024)
-
-
-def get_random_unicode(n, seed=42):
-    """
-    Get a random non-ASCII unicode string of size *n*.
-    """
-    indices = np.frombuffer(get_random_bytes(n * 2, seed=seed),
-                            dtype=np.int16) & 1023
-    unicode_arr = np.array(_1024_random_unicode_letters)[indices]
-
-    result = ''.join(unicode_arr.tolist())
-    assert len(result) == n, (len(result), len(unicode_arr))
-    return result
-
-
-class BuiltinsGenerator(object):
-
-    def __init__(self, seed=42):
-        self.rnd = np.random.RandomState(seed)
-
-    def sprinkle(self, lst, prob, value):
-        """
-        Sprinkle *value* entries in list *lst* with likelihood *prob*.
-        """
-        for i, p in enumerate(self.rnd.random_sample(size=len(lst))):
-            if p < prob:
-                lst[i] = value
-
-    def sprinkle_nones(self, lst, prob):
-        """
-        Sprinkle None entries in list *lst* with likelihood *prob*.
-        """
-        self.sprinkle(lst, prob, None)
-
-    def generate_int_list(self, n, none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of Python ints with *none_prob* probability of
-        an entry being None.
-        """
-        data = list(range(n))
-        self.sprinkle_nones(data, none_prob)
-        return data
-
-    def generate_float_list(self, n, none_prob=DEFAULT_NONE_PROB,
-                            use_nan=False):
-        """
-        Generate a list of Python floats with *none_prob* probability of
-        an entry being None (or NaN if *use_nan* is true).
-        """
-        # Make sure we get Python floats, not np.float64
-        data = list(map(float, self.rnd.uniform(0.0, 1.0, n)))
-        assert len(data) == n
-        self.sprinkle(data, none_prob, value=float('nan') if use_nan else None)
-        return data
-
-    def generate_bool_list(self, n, none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of Python bools with *none_prob* probability of
-        an entry being None.
-        """
-        # Make sure we get Python bools, not np.bool_
-        data = [bool(x >= 0.5) for x in self.rnd.uniform(0.0, 1.0, n)]
-        assert len(data) == n
-        self.sprinkle_nones(data, none_prob)
-        return data
-
-    def generate_decimal_list(self, n, none_prob=DEFAULT_NONE_PROB,
-                              use_nan=False):
-        """
-        Generate a list of Python Decimals with *none_prob* probability of
-        an entry being None (or NaN if *use_nan* is true).
-        """
-        data = [decimal.Decimal('%.9f' % f)
-                for f in self.rnd.uniform(0.0, 1.0, n)]
-        assert len(data) == n
-        self.sprinkle(data, none_prob,
-                      value=decimal.Decimal('nan') if use_nan else None)
-        return data
-
-    def generate_object_list(self, n, none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of generic Python objects with *none_prob*
-        probability of an entry being None.
-        """
-        data = [object() for i in range(n)]
-        self.sprinkle_nones(data, none_prob)
-        return data
-
-    def _generate_varying_sequences(self, random_factory, n, min_size,
-                                    max_size, none_prob):
-        """
-        Generate a list of *n* sequences of varying size between *min_size*
-        and *max_size*, with *none_prob* probability of an entry being None.
-        The base material for each sequence is obtained by calling
-        `random_factory(<some size>)`
-        """
-        base_size = 10000
-        base = random_factory(base_size + max_size)
-        data = []
-        for i in range(n):
-            off = self.rnd.randint(base_size)
-            if min_size == max_size:
-                size = min_size
-            else:
-                size = self.rnd.randint(min_size, max_size + 1)
-            data.append(base[off:off + size])
-        self.sprinkle_nones(data, none_prob)
-        assert len(data) == n
-        return data
-
-    def generate_fixed_binary_list(self, n, size, none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of bytestrings with a fixed *size*.
-        """
-        return self._generate_varying_sequences(get_random_bytes, n,
-                                                size, size, none_prob)
-
-    def generate_varying_binary_list(self, n, min_size, max_size,
-                                     none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of bytestrings with a random size between
-        *min_size* and *max_size*.
-        """
-        return self._generate_varying_sequences(get_random_bytes, n,
-                                                min_size, max_size, none_prob)
-
-    def generate_ascii_string_list(self, n, min_size, max_size,
-                                   none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of ASCII strings with a random size between
-        *min_size* and *max_size*.
-        """
-        return self._generate_varying_sequences(get_random_ascii, n,
-                                                min_size, max_size, none_prob)
-
-    def generate_unicode_string_list(self, n, min_size, max_size,
-                                     none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of unicode strings with a random size between
-        *min_size* and *max_size*.
-        """
-        return self._generate_varying_sequences(get_random_unicode, n,
-                                                min_size, max_size, none_prob)
-
-    def generate_int_list_list(self, n, min_size, max_size,
-                               none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of lists of Python ints with a random size between
-        *min_size* and *max_size*.
-        """
-        return self._generate_varying_sequences(
-            partial(self.generate_int_list, none_prob=none_prob),
-            n, min_size, max_size, none_prob)
-
-    def generate_tuple_list(self, n, none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of tuples with random values.
-        Each tuple has the form `(int value, float value, bool value)`
-        """
-        dicts = self.generate_dict_list(n, none_prob=none_prob)
-        tuples = [(d.get('u'), d.get('v'), d.get('w'))
-                  if d is not None else None
-                  for d in dicts]
-        assert len(tuples) == n
-        return tuples
-
-    def generate_dict_list(self, n, none_prob=DEFAULT_NONE_PROB):
-        """
-        Generate a list of dicts with random values.
-        Each dict has the form
-
-            `{'u': int value, 'v': float value, 'w': bool value}`
-        """
-        ints = self.generate_int_list(n, none_prob=none_prob)
-        floats = self.generate_float_list(n, none_prob=none_prob)
-        bools = self.generate_bool_list(n, none_prob=none_prob)
-        dicts = []
-        # Keep half the Nones, omit the other half
-        keep_nones = itertools.cycle([True, False])
-        for u, v, w in zip(ints, floats, bools):
-            d = {}
-            if u is not None or next(keep_nones):
-                d['u'] = u
-            if v is not None or next(keep_nones):
-                d['v'] = v
-            if w is not None or next(keep_nones):
-                d['w'] = w
-            dicts.append(d)
-        self.sprinkle_nones(dicts, none_prob)
-        assert len(dicts) == n
-        return dicts
-
-    def get_type_and_builtins(self, n, type_name):
-        """
-        Return a `(arrow type, list)` tuple where the arrow type
-        corresponds to the given logical *type_name*, and the list
-        is a list of *n* random-generated Python objects compatible
-        with the arrow type.
-        """
-        size = None
-
-        if type_name in ('bool', 'decimal', 'ascii', 'unicode', 'int64 list'):
-            kind = type_name
-        elif type_name.startswith(('int', 'uint')):
-            kind = 'int'
-        elif type_name.startswith('float'):
-            kind = 'float'
-        elif type_name.startswith('struct'):
-            kind = 'struct'
-        elif type_name == 'binary':
-            kind = 'varying binary'
-        elif type_name.startswith('binary'):
-            kind = 'fixed binary'
-            size = int(type_name[6:])
-            assert size > 0
-        else:
-            raise ValueError("unrecognized type %r" % (type_name,))
-
-        if kind in ('int', 'float'):
-            ty = getattr(pa, type_name)()
-        elif kind == 'bool':
-            ty = pa.bool_()
-        elif kind == 'decimal':
-            ty = pa.decimal128(9, 9)
-        elif kind == 'fixed binary':
-            ty = pa.binary(size)
-        elif kind == 'varying binary':
-            ty = pa.binary()
-        elif kind in ('ascii', 'unicode'):
-            ty = pa.string()
-        elif kind == 'int64 list':
-            ty = pa.list_(pa.int64())
-        elif kind == 'struct':
-            ty = pa.struct([pa.field('u', pa.int64()),
-                            pa.field('v', pa.float64()),
-                            pa.field('w', pa.bool_())])
-
-        factories = {
-            'int': self.generate_int_list,
-            'float': self.generate_float_list,
-            'bool': self.generate_bool_list,
-            'decimal': self.generate_decimal_list,
-            'fixed binary': partial(self.generate_fixed_binary_list,
-                                    size=size),
-            'varying binary': partial(self.generate_varying_binary_list,
-                                      min_size=3, max_size=40),
-            'ascii': partial(self.generate_ascii_string_list,
-                             min_size=3, max_size=40),
-            'unicode': partial(self.generate_unicode_string_list,
-                               min_size=3, max_size=40),
-            'int64 list': partial(self.generate_int_list_list,
-                                  min_size=0, max_size=20),
-            'struct': self.generate_dict_list,
-            'struct from tuples': self.generate_tuple_list,
-        }
-        data = factories[kind](n)
-        return ty, data
diff --git a/python/benchmarks/convert_builtins.py b/python/benchmarks/convert_builtins.py
deleted file mode 100644
index 48a38fa..0000000
--- a/python/benchmarks/convert_builtins.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow as pa
-
-from . import common
-
-
-# TODO:
-# - test dates and times
-
-
-class ConvertPyListToArray(object):
-    """
-    Benchmark pa.array(list of values, type=...)
-    """
-    size = 10 ** 5
-    types = ('int32', 'uint32', 'int64', 'uint64',
-             'float32', 'float64', 'bool', 'decimal',
-             'binary', 'binary10', 'ascii', 'unicode',
-             'int64 list', 'struct', 'struct from tuples')
-
-    param_names = ['type']
-    params = [types]
-
-    def setup(self, type_name):
-        gen = common.BuiltinsGenerator()
-        self.ty, self.data = gen.get_type_and_builtins(self.size, type_name)
-
-    def time_convert(self, *args):
-        pa.array(self.data, type=self.ty)
-
-
-class InferPyListToArray(object):
-    """
-    Benchmark pa.array(list of values) with type inference
-    """
-    size = 10 ** 5
-    types = ('int64', 'float64', 'bool', 'decimal', 'binary', 'ascii',
-             'unicode', 'int64 list', 'struct')
-
-    param_names = ['type']
-    params = [types]
-
-    def setup(self, type_name):
-        gen = common.BuiltinsGenerator()
-        self.ty, self.data = gen.get_type_and_builtins(self.size, type_name)
-
-    def time_infer(self, *args):
-        arr = pa.array(self.data)
-        assert arr.type == self.ty
-
-
-class ConvertArrayToPyList(object):
-    """
-    Benchmark pa.array.to_pylist()
-    """
-    size = 10 ** 5
-    types = ('int32', 'uint32', 'int64', 'uint64',
-             'float32', 'float64', 'bool', 'decimal',
-             'binary', 'binary10', 'ascii', 'unicode',
-             'int64 list', 'struct')
-
-    param_names = ['type']
-    params = [types]
-
-    def setup(self, type_name):
-        gen = common.BuiltinsGenerator()
-        self.ty, self.data = gen.get_type_and_builtins(self.size, type_name)
-        self.arr = pa.array(self.data, type=self.ty)
-
-    def time_convert(self, *args):
-        self.arr.to_pylist()
diff --git a/python/benchmarks/convert_pandas.py b/python/benchmarks/convert_pandas.py
deleted file mode 100644
index 9cf6bde..0000000
--- a/python/benchmarks/convert_pandas.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import pandas as pd
-import pandas.util.testing as tm
-
-import pyarrow as pa
-
-
-class PandasConversionsBase(object):
-    def setup(self, n, dtype):
-        if dtype == 'float64_nans':
-            arr = np.arange(n).astype('float64')
-            arr[arr % 10 == 0] = np.nan
-        else:
-            arr = np.arange(n).astype(dtype)
-        self.data = pd.DataFrame({'column': arr})
-
-
-class PandasConversionsToArrow(PandasConversionsBase):
-    param_names = ('size', 'dtype')
-    params = ((10, 10 ** 6), ('int64', 'float64', 'float64_nans', 'str'))
-
-    def time_from_series(self, n, dtype):
-        pa.Table.from_pandas(self.data)
-
-
-class PandasConversionsFromArrow(PandasConversionsBase):
-    param_names = ('size', 'dtype')
-    params = ((10, 10 ** 6), ('int64', 'float64', 'float64_nans', 'str'))
-
-    def setup(self, n, dtype):
-        super(PandasConversionsFromArrow, self).setup(n, dtype)
-        self.arrow_data = pa.Table.from_pandas(self.data)
-
-    def time_to_series(self, n, dtype):
-        self.arrow_data.to_pandas()
-
-
-class ToPandasStrings(object):
-
-    param_names = ('uniqueness', 'total')
-    params = ((0.001, 0.01, 0.1, 0.5), (1000000,))
-    string_length = 25
-
-    def setup(self, uniqueness, total):
-        nunique = int(total * uniqueness)
-        unique_values = [tm.rands(self.string_length) for i in range(nunique)]
-        values = unique_values * (total // nunique)
-        self.arr = pa.array(values, type=pa.string())
-        self.table = pa.Table.from_arrays([self.arr], ['f0'])
-
-    def time_to_pandas_dedup(self, *args):
-        self.arr.to_pandas()
-
-    def time_to_pandas_no_dedup(self, *args):
-        self.arr.to_pandas(deduplicate_objects=False)
-
-
-class ZeroCopyPandasRead(object):
-
-    def setup(self):
-        # Transpose to make column-major
-        values = np.random.randn(10, 100000)
-
-        df = pd.DataFrame(values.T)
-        ctx = pa.default_serialization_context()
-
-        self.serialized = ctx.serialize(df)
-        self.as_buffer = self.serialized.to_buffer()
-        self.as_components = self.serialized.to_components()
-
-    def time_deserialize_from_buffer(self):
-        pa.deserialize(self.as_buffer)
-
-    def time_deserialize_from_components(self):
-        pa.deserialize_components(self.as_components)
-
-
-class SerializeDeserializePandas(object):
-
-    def setup(self):
-        # 10 million length
-        n = 10000000
-        self.df = pd.DataFrame({'data': np.random.randn(n)})
-        self.serialized = pa.serialize_pandas(self.df)
-
-    def time_serialize_pandas(self):
-        pa.serialize_pandas(self.df)
-
-    def time_deserialize_pandas(self):
-        pa.deserialize_pandas(self.serialized)
-
-
-class TableFromPandasMicroperformance(object):
-    # ARROW-4629
-
-    def setup(self):
-        ser = pd.Series(range(10000))
-        df = pd.DataFrame({col: ser.copy(deep=True) for col in range(100)})
-        # Simulate a real dataset by converting some columns to strings
-        self.df = df.astype({col: str for col in range(50)})
-
-    def time_Table_from_pandas(self):
-        for _ in range(50):
-            pa.Table.from_pandas(self.df, nthreads=1)
diff --git a/python/benchmarks/io.py b/python/benchmarks/io.py
deleted file mode 100644
index 01a9acb..0000000
--- a/python/benchmarks/io.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import time
-import pyarrow as pa
-
-
-class HighLatencyReader(object):
-
-    def __init__(self, raw, latency):
-        self.raw = raw
-        self.latency = latency
-
-    def close(self):
-        self.raw.close()
-
-    @property
-    def closed(self):
-        return self.raw.closed
-
-    def read(self, nbytes=None):
-        time.sleep(self.latency)
-        return self.raw.read(nbytes)
-
-
-class HighLatencyWriter(object):
-
-    def __init__(self, raw, latency):
-        self.raw = raw
-        self.latency = latency
-
-    def close(self):
-        self.raw.close()
-
-    @property
-    def closed(self):
-        return self.raw.closed
-
-    def write(self, data):
-        time.sleep(self.latency)
-        self.raw.write(data)
-
-
-class BufferedIOHighLatency(object):
-    """Benchmark creating a parquet manifest."""
-
-    increment = 1024
-    total_size = 16 * (1 << 20)  # 16 MB
-    buffer_size = 1 << 20  # 1 MB
-    latency = 0.1  # 100ms
-
-    param_names = ('latency',)
-    params = [0, 0.01, 0.1]
-
-    def time_buffered_writes(self, latency):
-        test_data = b'x' * self.increment
-        bytes_written = 0
-        out = pa.BufferOutputStream()
-        slow_out = HighLatencyWriter(out, latency)
-        buffered_out = pa.output_stream(slow_out, buffer_size=self.buffer_size)
-
-        while bytes_written < self.total_size:
-            buffered_out.write(test_data)
-            bytes_written += self.increment
-        buffered_out.flush()
-
-    def time_buffered_reads(self, latency):
-        bytes_read = 0
-        reader = pa.input_stream(pa.py_buffer(b'x' * self.total_size))
-        slow_reader = HighLatencyReader(reader, latency)
-        buffered_reader = pa.input_stream(slow_reader,
-                                          buffer_size=self.buffer_size)
-        while bytes_read < self.total_size:
-            buffered_reader.read(self.increment)
-            bytes_read += self.increment
diff --git a/python/benchmarks/microbenchmarks.py b/python/benchmarks/microbenchmarks.py
deleted file mode 100644
index f8ba383..0000000
--- a/python/benchmarks/microbenchmarks.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow.benchmark as pb
-
-from . import common
-
-
-class PandasObjectIsNull(object):
-    size = 10 ** 5
-    types = ('int', 'float', 'object', 'decimal')
-
-    param_names = ['type']
-    params = [types]
-
-    def setup(self, type_name):
-        gen = common.BuiltinsGenerator()
-        if type_name == 'int':
-            lst = gen.generate_int_list(self.size)
-        elif type_name == 'float':
-            lst = gen.generate_float_list(self.size, use_nan=True)
-        elif type_name == 'object':
-            lst = gen.generate_object_list(self.size)
-        elif type_name == 'decimal':
-            lst = gen.generate_decimal_list(self.size)
-        else:
-            assert 0
-        self.lst = lst
-
-    def time_PandasObjectIsNull(self, *args):
-        pb.benchmark_PandasObjectIsNull(self.lst)
diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
deleted file mode 100644
index 3aeca42..0000000
--- a/python/benchmarks/parquet.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import shutil
-import tempfile
-
-from pandas.util.testing import rands
-import numpy as np
-import pandas as pd
-
-import pyarrow as pa
-try:
-    import pyarrow.parquet as pq
-except ImportError:
-    pq = None
-
-
-class ParquetManifestCreation(object):
-    """Benchmark creating a parquet manifest."""
-
-    size = 10 ** 6
-    tmpdir = None
-
-    param_names = ('num_partitions', 'num_threads')
-    params = [(10, 100, 1000), (1, 8)]
-
-    def setup(self, num_partitions, num_threads):
-        if pq is None:
-            raise NotImplementedError("Parquet support not enabled")
-
-        self.tmpdir = tempfile.mkdtemp('benchmark_parquet')
-        rnd = np.random.RandomState(42)
-        num1 = rnd.randint(0, num_partitions, size=self.size)
-        num2 = rnd.randint(0, 1000, size=self.size)
-        output_df = pd.DataFrame({'num1': num1, 'num2': num2})
-        output_table = pa.Table.from_pandas(output_df)
-        pq.write_to_dataset(output_table, self.tmpdir, ['num1'])
-
-    def teardown(self, num_partitions, num_threads):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
-
-    def time_manifest_creation(self, num_partitions, num_threads):
-        pq.ParquetManifest(self.tmpdir, metadata_nthreads=num_threads)
-
-
-class ParquetWriteBinary(object):
-
-    def setup(self):
-        nuniques = 100000
-        value_size = 50
-        length = 1000000
-        num_cols = 10
-
-        unique_values = np.array([rands(value_size) for
-                                  i in range(nuniques)], dtype='O')
-        values = unique_values[np.random.randint(0, nuniques, size=length)]
-        self.table = pa.table([pa.array(values) for i in range(num_cols)],
-                              names=['f{}'.format(i) for i in range(num_cols)])
-        self.table_df = self.table.to_pandas()
-
-    def time_write_binary_table(self):
-        out = pa.BufferOutputStream()
-        pq.write_table(self.table, out)
-
-    def time_write_binary_table_uncompressed(self):
-        out = pa.BufferOutputStream()
-        pq.write_table(self.table, out, compression='none')
-
-    def time_write_binary_table_no_dictionary(self):
-        out = pa.BufferOutputStream()
-        pq.write_table(self.table, out, use_dictionary=False)
-
-    def time_convert_pandas_and_write_binary_table(self):
-        out = pa.BufferOutputStream()
-        pq.write_table(pa.table(self.table_df), out)
-
-
-def generate_dict_strings(string_size, nunique, length, random_order=True):
-    uniques = np.array([rands(string_size) for i in range(nunique)], dtype='O')
-    if random_order:
-        indices = np.random.randint(0, nunique, size=length).astype('i4')
-    else:
-        indices = np.arange(nunique).astype('i4').repeat(length // nunique)
-    return pa.DictionaryArray.from_arrays(indices, uniques)
-
-
-def generate_dict_table(num_cols, string_size, nunique, length,
-                        random_order=True):
-    data = generate_dict_strings(string_size, nunique, length,
-                                 random_order=random_order)
-    return pa.table([
-        data for i in range(num_cols)
-    ], names=['f{}'.format(i) for i in range(num_cols)])
-
-
-class ParquetWriteDictionaries(object):
-
-    param_names = ('nunique',)
-    params = [(1000), (100000)]
-
-    def setup(self, nunique):
-        self.num_cols = 10
-        self.value_size = 32
-        self.nunique = nunique
-        self.length = 10000000
-
-        self.table = generate_dict_table(self.num_cols, self.value_size,
-                                         self.nunique, self.length)
-        self.table_sequential = generate_dict_table(self.num_cols,
-                                                    self.value_size,
-                                                    self.nunique, self.length,
-                                                    random_order=False)
-
-    def time_write_random_order(self, nunique):
-        pq.write_table(self.table, pa.BufferOutputStream())
-
-    def time_write_sequential(self, nunique):
-        pq.write_table(self.table_sequential, pa.BufferOutputStream())
-
-
-class ParquetManyColumns(object):
-
-    total_cells = 10000000
-    param_names = ('num_cols',)
-    params = [100, 1000, 10000]
-
-    def setup(self, num_cols):
-        num_rows = self.total_cells // num_cols
-        self.table = pa.table({'c' + str(i): np.random.randn(num_rows)
-                               for i in range(num_cols)})
-
-        out = pa.BufferOutputStream()
-        pq.write_table(self.table, out)
-        self.buf = out.getvalue()
-
-    def time_write(self, num_cols):
-        out = pa.BufferOutputStream()
-        pq.write_table(self.table, out)
-
-    def time_read(self, num_cols):
-        pq.read_table(self.buf)
diff --git a/python/benchmarks/plasma.py b/python/benchmarks/plasma.py
deleted file mode 100644
index 90a2845..0000000
--- a/python/benchmarks/plasma.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import timeit
-
-try:
-    import pyarrow.plasma as plasma
-except ImportError:
-    # TODO(wesm): These are not asv benchmarks, so we can just fail
-    # silently here
-    pass
-
-
-class SimplePlasmaThroughput(object):
-    """Benchmark plasma store throughput with a single client."""
-
-    params = [1000, 100000, 10000000]
-
-    timer = timeit.default_timer
-
-    def setup(self, size):
-        self.plasma_store_ctx = plasma.start_plasma_store(
-            plasma_store_memory=10**9)
-        plasma_store_name, p = self.plasma_store_ctx.__enter__()
-        self.plasma_client = plasma.connect(plasma_store_name)
-
-        self.data = np.random.randn(size // 8)
-
-    def teardown(self, size):
-        self.plasma_store_ctx.__exit__(None, None, None)
-
-    def time_plasma_put_data(self, size):
-        self.plasma_client.put(self.data)
-
-
-class SimplePlasmaLatency(object):
-    """Benchmark plasma store latency with a single client."""
-
-    timer = timeit.default_timer
-
-    def setup(self):
-        self.plasma_store_ctx = plasma.start_plasma_store(
-            plasma_store_memory=10**9)
-        plasma_store_name, p = self.plasma_store_ctx.__enter__()
-        self.plasma_client = plasma.connect(plasma_store_name)
-
-    def teardown(self):
-        self.plasma_store_ctx.__exit__(None, None, None)
-
-    def time_plasma_put(self):
-        for i in range(1000):
-            self.plasma_client.put(1)
-
-    def time_plasma_putget(self):
-        for i in range(1000):
-            x = self.plasma_client.put(1)
-            self.plasma_client.get(x)
diff --git a/python/benchmarks/streaming.py b/python/benchmarks/streaming.py
deleted file mode 100644
index c0c63e6..0000000
--- a/python/benchmarks/streaming.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-
-from . import common
-from .common import KILOBYTE, MEGABYTE
-
-
-def generate_chunks(total_size, nchunks, ncols, dtype=np.dtype('int64')):
-    rowsize = total_size // nchunks // ncols
-    assert rowsize % dtype.itemsize == 0
-
-    def make_column(col, chunk):
-        return np.frombuffer(common.get_random_bytes(
-            rowsize, seed=col + 997 * chunk)).view(dtype)
-
-    return [pd.DataFrame({
-            'c' + str(col): make_column(col, chunk)
-            for col in range(ncols)})
-            for chunk in range(nchunks)]
-
-
-class StreamReader(object):
-    """
-    Benchmark in-memory streaming to a Pandas dataframe.
-    """
-    total_size = 64 * MEGABYTE
-    ncols = 8
-    chunk_sizes = [16 * KILOBYTE, 256 * KILOBYTE, 8 * MEGABYTE]
-
-    param_names = ['chunk_size']
-    params = [chunk_sizes]
-
-    def setup(self, chunk_size):
-        # Note we're careful to stream different chunks instead of
-        # streaming N times the same chunk, so that we avoid operating
-        # entirely out of L1/L2.
-        chunks = generate_chunks(self.total_size,
-                                 nchunks=self.total_size // chunk_size,
-                                 ncols=self.ncols)
-        batches = [pa.RecordBatch.from_pandas(df)
-                   for df in chunks]
-        schema = batches[0].schema
-        sink = pa.BufferOutputStream()
-        stream_writer = pa.RecordBatchStreamWriter(sink, schema)
-        for batch in batches:
-            stream_writer.write_batch(batch)
-        self.source = sink.getvalue()
-
-    def time_read_to_dataframe(self, *args):
-        reader = pa.RecordBatchStreamReader(self.source)
-        table = reader.read_all()
-        df = table.to_pandas()  # noqa
diff --git a/python/cmake_modules b/python/cmake_modules
deleted file mode 120000
index 76e2a8d..0000000
--- a/python/cmake_modules
+++ /dev/null
@@ -1 +0,0 @@
-../cpp/cmake_modules
\ No newline at end of file
diff --git a/python/examples/flight/client.py b/python/examples/flight/client.py
deleted file mode 100644
index ed6ce54..0000000
--- a/python/examples/flight/client.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""An example Flight CLI client."""
-
-import argparse
-import sys
-
-import pyarrow
-import pyarrow.flight
-import pyarrow.csv as csv
-
-
-def list_flights(args, client, connection_args={}):
-    print('Flights\n=======')
-    for flight in client.list_flights():
-        descriptor = flight.descriptor
-        if descriptor.descriptor_type == pyarrow.flight.DescriptorType.PATH:
-            print("Path:", descriptor.path)
-        elif descriptor.descriptor_type == pyarrow.flight.DescriptorType.CMD:
-            print("Command:", descriptor.command)
-        else:
-            print("Unknown descriptor type")
-
-        print("Total records:", end=" ")
-        if flight.total_records >= 0:
-            print(flight.total_records)
-        else:
-            print("Unknown")
-
-        print("Total bytes:", end=" ")
-        if flight.total_bytes >= 0:
-            print(flight.total_bytes)
-        else:
-            print("Unknown")
-
-        print("Number of endpoints:", len(flight.endpoints))
-        print("Schema:")
-        print(flight.schema)
-        print('---')
-
-    print('\nActions\n=======')
-    for action in client.list_actions():
-        print("Type:", action.type)
-        print("Description:", action.description)
-        print('---')
-
-
-def do_action(args, client, connection_args={}):
-    try:
-        buf = pyarrow.allocate_buffer(0)
-        action = pyarrow.flight.Action(args.action_type, buf)
-        print('Running action', args.action_type)
-        for result in client.do_action(action):
-            print("Got result", result.body.to_pybytes())
-    except pyarrow.lib.ArrowIOError as e:
-        print("Error calling action:", e)
-
-
-def push_data(args, client, connection_args={}):
-    print('File Name:', args.file)
-    my_table = csv.read_csv(args.file)
-    print('Table rows=', str(len(my_table)))
-    df = my_table.to_pandas()
-    print(df.head())
-    writer, _ = client.do_put(
-        pyarrow.flight.FlightDescriptor.for_path(args.file), my_table.schema)
-    writer.write_table(my_table)
-    writer.close()
-
-
-def get_flight(args, client, connection_args={}):
-    if args.path:
-        descriptor = pyarrow.flight.FlightDescriptor.for_path(*args.path)
-    else:
-        descriptor = pyarrow.flight.FlightDescriptor.for_command(args.command)
-
-    info = client.get_flight_info(descriptor)
-    for endpoint in info.endpoints:
-        print('Ticket:', endpoint.ticket)
-        for location in endpoint.locations:
-            print(location)
-            get_client = pyarrow.flight.FlightClient(location,
-                                                     **connection_args)
-            reader = get_client.do_get(endpoint.ticket)
-            df = reader.read_pandas()
-            print(df)
-
-
-def _add_common_arguments(parser):
-    parser.add_argument('--tls', action='store_true',
-                        help='Enable transport-level security')
-    parser.add_argument('--tls-roots', default=None,
-                        help='Path to trusted TLS certificate(s)')
-    parser.add_argument("--mtls", nargs=2, default=None,
-                        metavar=('CERTFILE', 'KEYFILE'),
-                        help="Enable transport-level security")
-    parser.add_argument('host', type=str,
-                        help="Address or hostname to connect to")
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    subcommands = parser.add_subparsers()
-
-    cmd_list = subcommands.add_parser('list')
-    cmd_list.set_defaults(action='list')
-    _add_common_arguments(cmd_list)
-    cmd_list.add_argument('-l', '--list', action='store_true',
-                          help="Print more details.")
-
-    cmd_do = subcommands.add_parser('do')
-    cmd_do.set_defaults(action='do')
-    _add_common_arguments(cmd_do)
-    cmd_do.add_argument('action_type', type=str,
-                        help="The action type to run.")
-
-    cmd_put = subcommands.add_parser('put')
-    cmd_put.set_defaults(action='put')
-    _add_common_arguments(cmd_put)
-    cmd_put.add_argument('file', type=str,
-                         help="CSV file to upload.")
-
-    cmd_get = subcommands.add_parser('get')
-    cmd_get.set_defaults(action='get')
-    _add_common_arguments(cmd_get)
-    cmd_get_descriptor = cmd_get.add_mutually_exclusive_group(required=True)
-    cmd_get_descriptor.add_argument('-p', '--path', type=str, action='append',
-                                    help="The path for the descriptor.")
-    cmd_get_descriptor.add_argument('-c', '--command', type=str,
-                                    help="The command for the descriptor.")
-
-    args = parser.parse_args()
-    if not hasattr(args, 'action'):
-        parser.print_help()
-        sys.exit(1)
-
-    commands = {
-        'list': list_flights,
-        'do': do_action,
-        'get': get_flight,
-        'put': push_data,
-    }
-    host, port = args.host.split(':')
-    port = int(port)
-    scheme = "grpc+tcp"
-    connection_args = {}
-    if args.tls:
-        scheme = "grpc+tls"
-        if args.tls_roots:
-            with open(args.tls_roots, "rb") as root_certs:
-                connection_args["tls_root_certs"] = root_certs.read()
-    if args.mtls:
-        with open(args.mtls[0], "rb") as cert_file:
-            tls_cert_chain = cert_file.read()
-        with open(args.mtls[1], "rb") as key_file:
-            tls_private_key = key_file.read()
-        connection_args["cert_chain"] = tls_cert_chain
-        connection_args["private_key"] = tls_private_key
-    client = pyarrow.flight.FlightClient(f"{scheme}://{host}:{port}",
-                                         **connection_args)
-    while True:
-        try:
-            action = pyarrow.flight.Action("healthcheck", b"")
-            options = pyarrow.flight.FlightCallOptions(timeout=1)
-            list(client.do_action(action, options=options))
-            break
-        except pyarrow.ArrowIOError as e:
-            if "Deadline" in str(e):
-                print("Server is not ready, waiting...")
-    commands[args.action](args, client, connection_args)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/python/examples/flight/middleware.py b/python/examples/flight/middleware.py
deleted file mode 100644
index 2056bae..0000000
--- a/python/examples/flight/middleware.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Example of invisibly propagating a request ID with middleware."""
-
-import argparse
-import sys
-import threading
-import uuid
-
-import pyarrow as pa
-import pyarrow.flight as flight
-
-
-class TraceContext:
-    _locals = threading.local()
-    _locals.trace_id = None
-
-    @classmethod
-    def current_trace_id(cls):
-        if not getattr(cls._locals, "trace_id", None):
-            cls.set_trace_id(uuid.uuid4().hex)
-        return cls._locals.trace_id
-
-    @classmethod
-    def set_trace_id(cls, trace_id):
-        cls._locals.trace_id = trace_id
-
-
-TRACE_HEADER = "x-tracing-id"
-
-
-class TracingServerMiddleware(flight.ServerMiddleware):
-    def __init__(self, trace_id):
-        self.trace_id = trace_id
-
-    def sending_headers(self):
-        return {
-            TRACE_HEADER: self.trace_id,
-        }
-
-
-class TracingServerMiddlewareFactory(flight.ServerMiddlewareFactory):
-    def start_call(self, info, headers):
-        print("Starting new call:", info)
-        if TRACE_HEADER in headers:
-            trace_id = headers[TRACE_HEADER][0]
-            print("Found trace header with value:", trace_id)
-            TraceContext.set_trace_id(trace_id)
-        return TracingServerMiddleware(TraceContext.current_trace_id())
-
-
-class TracingClientMiddleware(flight.ClientMiddleware):
-    def sending_headers(self):
-        print("Sending trace ID:", TraceContext.current_trace_id())
-        return {
-            "x-tracing-id": TraceContext.current_trace_id(),
-        }
-
-    def received_headers(self, headers):
-        if TRACE_HEADER in headers:
-            trace_id = headers[TRACE_HEADER][0]
-            print("Found trace header with value:", trace_id)
-            # Don't overwrite our trace ID
-
-
-class TracingClientMiddlewareFactory(flight.ClientMiddlewareFactory):
-    def start_call(self, info):
-        print("Starting new call:", info)
-        return TracingClientMiddleware()
-
-
-class FlightServer(flight.FlightServerBase):
-    def __init__(self, delegate, **kwargs):
-        super().__init__(**kwargs)
-        if delegate:
-            self.delegate = flight.connect(
-                delegate,
-                middleware=(TracingClientMiddlewareFactory(),))
-        else:
-            self.delegate = None
-
-    def list_actions(self, context):
-        return [
-            ("get-trace-id", "Get the trace context ID."),
-        ]
-
-    def do_action(self, context, action):
-        trace_middleware = context.get_middleware("trace")
-        if trace_middleware:
-            TraceContext.set_trace_id(trace_middleware.trace_id)
-        if action.type == "get-trace-id":
-            if self.delegate:
-                for result in self.delegate.do_action(action):
-                    yield result
-            else:
-                trace_id = TraceContext.current_trace_id().encode("utf-8")
-                print("Returning trace ID:", trace_id)
-                buf = pa.py_buffer(trace_id)
-                yield pa.flight.Result(buf)
-        else:
-            raise KeyError(f"Unknown action {action.type!r}")
-
-
-def main():
-    parser = argparse.ArgumentParser()
-
-    subparsers = parser.add_subparsers(dest="command")
-    client = subparsers.add_parser("client", help="Run the client.")
-    client.add_argument("server")
-    client.add_argument("--request-id", default=None)
-
-    server = subparsers.add_parser("server", help="Run the server.")
-    server.add_argument(
-        "--listen",
-        required=True,
-        help="The location to listen on (example: grpc://localhost:5050)",
-    )
-    server.add_argument(
-        "--delegate",
-        required=False,
-        default=None,
-        help=("A location to delegate to. That is, this server will "
-              "simply call the given server for the response. Demonstrates "
-              "propagation of the trace ID between servers."),
-    )
-
-    args = parser.parse_args()
-    if not getattr(args, "command"):
-        parser.print_help()
-        return 1
-
-    if args.command == "server":
-        server = FlightServer(
-            args.delegate,
-            location=args.listen,
-            middleware={"trace": TracingServerMiddlewareFactory()})
-        server.serve()
-    elif args.command == "client":
-        client = flight.connect(
-            args.server,
-            middleware=(TracingClientMiddlewareFactory(),))
-        if args.request_id:
-            TraceContext.set_trace_id(args.request_id)
-        else:
-            TraceContext.set_trace_id("client-chosen-id")
-
-        for result in client.do_action(flight.Action("get-trace-id", b"")):
-            print(result.body.to_pybytes())
-
-
-if __name__ == "__main__":
-    sys.exit(main() or 0)
diff --git a/python/examples/flight/server.py b/python/examples/flight/server.py
deleted file mode 100644
index 7a6b669..0000000
--- a/python/examples/flight/server.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""An example Flight Python server."""
-
-import argparse
-import ast
-import threading
-import time
-
-import pyarrow
-import pyarrow.flight
-
-
-class FlightServer(pyarrow.flight.FlightServerBase):
-    def __init__(self, host="localhost", location=None,
-                 tls_certificates=None, verify_client=False,
-                 root_certificates=None, auth_handler=None):
-        super(FlightServer, self).__init__(
-            location, auth_handler, tls_certificates, verify_client,
-            root_certificates)
-        self.flights = {}
-        self.host = host
-        self.tls_certificates = tls_certificates
-
-    @classmethod
-    def descriptor_to_key(self, descriptor):
-        return (descriptor.descriptor_type.value, descriptor.command,
-                tuple(descriptor.path or tuple()))
-
-    def _make_flight_info(self, key, descriptor, table):
-        if self.tls_certificates:
-            location = pyarrow.flight.Location.for_grpc_tls(
-                self.host, self.port)
-        else:
-            location = pyarrow.flight.Location.for_grpc_tcp(
-                self.host, self.port)
-        endpoints = [pyarrow.flight.FlightEndpoint(repr(key), [location]), ]
-
-        mock_sink = pyarrow.MockOutputStream()
-        stream_writer = pyarrow.RecordBatchStreamWriter(
-            mock_sink, table.schema)
-        stream_writer.write_table(table)
-        stream_writer.close()
-        data_size = mock_sink.size()
-
-        return pyarrow.flight.FlightInfo(table.schema,
-                                         descriptor, endpoints,
-                                         table.num_rows, data_size)
-
-    def list_flights(self, context, criteria):
-        for key, table in self.flights.items():
-            if key[1] is not None:
-                descriptor = \
-                    pyarrow.flight.FlightDescriptor.for_command(key[1])
-            else:
-                descriptor = pyarrow.flight.FlightDescriptor.for_path(*key[2])
-
-            yield self._make_flight_info(key, descriptor, table)
-
-    def get_flight_info(self, context, descriptor):
-        key = FlightServer.descriptor_to_key(descriptor)
-        if key in self.flights:
-            table = self.flights[key]
-            return self._make_flight_info(key, descriptor, table)
-        raise KeyError('Flight not found.')
-
-    def do_put(self, context, descriptor, reader, writer):
-        key = FlightServer.descriptor_to_key(descriptor)
-        print(key)
-        self.flights[key] = reader.read_all()
-        print(self.flights[key])
-
-    def do_get(self, context, ticket):
-        key = ast.literal_eval(ticket.ticket.decode())
-        if key not in self.flights:
-            return None
-        return pyarrow.flight.RecordBatchStream(self.flights[key])
-
-    def list_actions(self, context):
-        return [
-            ("clear", "Clear the stored flights."),
-            ("shutdown", "Shut down this server."),
-        ]
-
-    def do_action(self, context, action):
-        if action.type == "clear":
-            raise NotImplementedError(
-                "{} is not implemented.".format(action.type))
-        elif action.type == "healthcheck":
-            pass
-        elif action.type == "shutdown":
-            yield pyarrow.flight.Result(pyarrow.py_buffer(b'Shutdown!'))
-            # Shut down on background thread to avoid blocking current
-            # request
-            threading.Thread(target=self._shutdown).start()
-        else:
-            raise KeyError("Unknown action {!r}".format(action.type))
-
-    def _shutdown(self):
-        """Shut down after a delay."""
-        print("Server is shutting down...")
-        time.sleep(2)
-        self.shutdown()
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="localhost",
-                        help="Address or hostname to listen on")
-    parser.add_argument("--port", type=int, default=5005,
-                        help="Port number to listen on")
-    parser.add_argument("--tls", nargs=2, default=None,
-                        metavar=('CERTFILE', 'KEYFILE'),
-                        help="Enable transport-level security")
-    parser.add_argument("--verify_client", type=bool, default=False,
-                        help="enable mutual TLS and verify the client if True")
-
-    args = parser.parse_args()
-    tls_certificates = []
-    scheme = "grpc+tcp"
-    if args.tls:
-        scheme = "grpc+tls"
-        with open(args.tls[0], "rb") as cert_file:
-            tls_cert_chain = cert_file.read()
-        with open(args.tls[1], "rb") as key_file:
-            tls_private_key = key_file.read()
-        tls_certificates.append((tls_cert_chain, tls_private_key))
-
-    location = "{}://{}:{}".format(scheme, args.host, args.port)
-
-    server = FlightServer(args.host, location,
-                          tls_certificates=tls_certificates,
-                          verify_client=args.verify_client)
-    print("Serving on", location)
-    server.serve()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/python/examples/minimal_build/Dockerfile.fedora b/python/examples/minimal_build/Dockerfile.fedora
deleted file mode 100644
index 7dc3291..0000000
--- a/python/examples/minimal_build/Dockerfile.fedora
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM fedora:31
-
-RUN dnf update -y && \
-    dnf install -y \
-        autoconf \
-        gcc \
-        gcc-g++ \
-        git \
-        wget \
-        make \
-        cmake \
-        ninja-build \
-        python3-devel \
-        python3-virtualenv
\ No newline at end of file
diff --git a/python/examples/minimal_build/Dockerfile.ubuntu b/python/examples/minimal_build/Dockerfile.ubuntu
deleted file mode 100644
index d7b8408..0000000
--- a/python/examples/minimal_build/Dockerfile.ubuntu
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:bionic
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        apt-transport-https \
-        software-properties-common \
-        wget && \
-    apt-get install -y -q --no-install-recommends \
-      build-essential \
-      cmake \
-      git \
-      ninja-build \
-      python3-dev \
-      python3-pip && \
-      apt-get clean && rm -rf /var/lib/apt/lists*
-
-RUN pip3 install wheel && \
-    pip3 install -U setuptools && \
-    pip3 install wheel virtualenv
\ No newline at end of file
diff --git a/python/examples/minimal_build/README.md b/python/examples/minimal_build/README.md
deleted file mode 100644
index 9803e18..0000000
--- a/python/examples/minimal_build/README.md
+++ /dev/null
@@ -1,73 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Minimal Python source build on Linux
-
-This directory shows how to bootstrap a local build from source on Linux with
-an eye toward maximum portability across different Linux distributions. This
-may help for contributors debugging build issues caused by their local
-environments.
-
-## Fedora 31
-
-First, build the Docker image using:
-```
-docker build -t arrow_fedora_minimal -f Dockerfile.fedora .
-```
-
-Then build PyArrow with conda or pip/virtualenv, respectively:
-```
-# With pip/virtualenv
-docker run --rm -t -i -v $PWD:/io arrow_fedora_minimal /io/build_venv.sh
-
-# With conda
-docker run --rm -t -i -v $PWD:/io arrow_fedora_minimal /io/build_conda.sh
-```
-
-## Ubuntu 18.04
-
-First, build the Docker image using:
-```
-docker build -t arrow_ubuntu_minimal -f Dockerfile.ubuntu .
-```
-
-Then build PyArrow with conda or pip/virtualenv, respectively:
-```
-# With pip/virtualenv
-docker run --rm -t -i -v $PWD:/io arrow_ubuntu_minimal /io/build_venv.sh
-
-# With conda
-docker run --rm -t -i -v $PWD:/io arrow_ubuntu_minimal /io/build_conda.sh
-```
-
-## Building on Fedora - Podman and SELinux
-
-In addition to using Podman instead of Docker, you need to specify `:Z`
-for SELinux relabelling when binding a volume.
-
-First, build the image using:
-```
-podman build -t arrow_fedora_minimal -f Dockerfile.fedora
-```
-
-Then build PyArrow with pip/virtualenv:
-```
-# With pip/virtualenv
-podman run --rm -i -v $PWD:/io:Z -t arrow_fedora_minimal /io/build_venv.sh
-```
diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh
deleted file mode 100755
index 6f93ebd..0000000
--- a/python/examples/minimal_build/build_conda.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-#----------------------------------------------------------------------
-# Change this to whatever makes sense for your system
-
-HOME=
-MINICONDA=$HOME/miniconda-for-arrow
-LIBRARY_INSTALL_DIR=$HOME/local-libs
-CPP_BUILD_DIR=$HOME/arrow-cpp-build
-ARROW_ROOT=/arrow
-PYTHON=3.7
-
-git clone https://github.com/apache/arrow.git /arrow
-
-#----------------------------------------------------------------------
-# Run these only once
-
-function setup_miniconda() {
-  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
-  wget -O miniconda.sh $MINICONDA_URL
-  bash miniconda.sh -b -p $MINICONDA
-  rm -f miniconda.sh
-  LOCAL_PATH=$PATH
-  export PATH="$MINICONDA/bin:$PATH"
-
-  conda update -y -q conda
-  conda config --set auto_update_conda false
-  conda info -a
-
-  conda config --set show_channel_urls True
-  conda config --add channels https://repo.continuum.io/pkgs/free
-  conda config --add channels conda-forge
-
-  conda create -y -n pyarrow-$PYTHON -c conda-forge \
-        --file arrow/ci/conda_env_unix.yml \
-        --file arrow/ci/conda_env_cpp.yml \
-        --file arrow/ci/conda_env_python.yml \
-        compilers \
-        python=3.7 \
-        pandas
-
-  export PATH=$LOCAL_PATH
-}
-
-setup_miniconda
-
-#----------------------------------------------------------------------
-# Activate conda in bash and activate conda environment
-
-. $MINICONDA/etc/profile.d/conda.sh
-conda activate pyarrow-$PYTHON
-export ARROW_HOME=$CONDA_PREFIX
-
-#----------------------------------------------------------------------
-# Build C++ library
-
-mkdir -p $CPP_BUILD_DIR
-pushd $CPP_BUILD_DIR
-
-cmake -GNinja \
-      -DCMAKE_BUILD_TYPE=DEBUG \
-      -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-      -DCMAKE_INSTALL_LIBDIR=lib \
-      -DARROW_FLIGHT=ON \
-      -DARROW_WITH_BZ2=ON \
-      -DARROW_WITH_ZLIB=ON \
-      -DARROW_WITH_ZSTD=ON \
-      -DARROW_WITH_LZ4=ON \
-      -DARROW_WITH_SNAPPY=ON \
-      -DARROW_WITH_BROTLI=ON \
-      -DARROW_PARQUET=ON \
-      -DARROW_PLASMA=ON \
-      -DARROW_PYTHON=ON \
-      $ARROW_ROOT/cpp
-
-ninja install
-
-popd
-
-#----------------------------------------------------------------------
-# Build and test Python library
-pushd $ARROW_ROOT/python
-
-rm -rf build/  # remove any pesky pre-existing build directory
-
-export PYARROW_BUILD_TYPE=Debug
-export PYARROW_CMAKE_GENERATOR=Ninja
-export PYARROW_WITH_FLIGHT=1
-export PYARROW_WITH_PARQUET=1
-
-# You can run either "develop" or "build_ext --inplace". Your pick
-
-# python setup.py build_ext --inplace
-python setup.py develop
-
-# git submodules are required for unit tests
-git submodule update --init
-export PARQUET_TEST_DATA="$ARROW_ROOT/cpp/submodules/parquet-testing/data"
-export ARROW_TEST_DATA="$ARROW_ROOT/testing/data"
-
-py.test pyarrow
diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh
deleted file mode 100755
index afa4206..0000000
--- a/python/examples/minimal_build/build_venv.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-#----------------------------------------------------------------------
-# Change this to whatever makes sense for your system
-
-WORKDIR=${WORKDIR:-$HOME}
-MINICONDA=$WORKDIR/miniconda-for-arrow
-LIBRARY_INSTALL_DIR=$WORKDIR/local-libs
-CPP_BUILD_DIR=$WORKDIR/arrow-cpp-build
-ARROW_ROOT=$WORKDIR/arrow
-export ARROW_HOME=$WORKDIR/dist
-export LD_LIBRARY_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
-
-virtualenv $WORKDIR/venv
-source $WORKDIR/venv/bin/activate
-
-git clone https://github.com/apache/arrow.git $ARROW_ROOT
-
-pip install -r $ARROW_ROOT/python/requirements-build.txt \
-     -r $ARROW_ROOT/python/requirements-test.txt
-
-#----------------------------------------------------------------------
-# Build C++ library
-
-mkdir -p $CPP_BUILD_DIR
-pushd $CPP_BUILD_DIR
-
-cmake -GNinja \
-      -DCMAKE_BUILD_TYPE=DEBUG \
-      -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-      -DCMAKE_INSTALL_LIBDIR=lib \
-      -DARROW_WITH_BZ2=ON \
-      -DARROW_WITH_ZLIB=ON \
-      -DARROW_WITH_ZSTD=ON \
-      -DARROW_WITH_LZ4=ON \
-      -DARROW_WITH_SNAPPY=ON \
-      -DARROW_WITH_BROTLI=ON \
-      -DARROW_PARQUET=ON \
-      -DARROW_PYTHON=ON \
-      $ARROW_ROOT/cpp
-
-ninja install
-
-popd
-
-#----------------------------------------------------------------------
-# Build and test Python library
-pushd $ARROW_ROOT/python
-
-rm -rf build/  # remove any pesky pre-existing build directory
-
-export PYARROW_BUILD_TYPE=Debug
-export PYARROW_CMAKE_GENERATOR=Ninja
-export PYARROW_WITH_PARQUET=1
-
-# You can run either "develop" or "build_ext --inplace". Your pick
-
-# python setup.py build_ext --inplace
-python setup.py develop
-
-# git submodules are required for unit tests
-git submodule update --init
-export PARQUET_TEST_DATA="$ARROW_ROOT/cpp/submodules/parquet-testing/data"
-export ARROW_TEST_DATA="$ARROW_ROOT/testing/data"
-
-py.test pyarrow
diff --git a/python/examples/plasma/sorting/multimerge.pyx b/python/examples/plasma/sorting/multimerge.pyx
deleted file mode 100644
index 5e77fdf..0000000
--- a/python/examples/plasma/sorting/multimerge.pyx
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from libc.stdint cimport uintptr_t
-from libcpp.vector cimport vector
-from libcpp.pair cimport pair
-
-import numpy as np
-
-cimport numpy as np
-
-cdef extern from "<queue>" namespace "std" nogil:
-    cdef cppclass priority_queue[T]:
-        priority_queue() except +
-        priority_queue(priority_queue&) except +
-        bint empty()
-        void pop()
-        void push(T&)
-        size_t size()
-        T& top()
-
-
-def multimerge2d(*arrays):
-    """Merge a list of sorted 2d arrays into a sorted 2d array.
-
-    This assumes C style ordering for both input and output arrays. For
-    each input array we have array[i,0] <= array[i+1,0] and for the output
-    array the same will hold.
-
-    Ideally this code would be simpler and also support both C style
-    and Fortran style ordering.
-    """
-    cdef int num_arrays = len(arrays)
-    assert num_arrays > 0
-
-    cdef int num_cols = arrays[0].shape[1]
-
-    for i in range(num_arrays):
-        assert arrays[i].ndim == 2
-        assert arrays[i].dtype == np.float64
-        assert arrays[i].shape[1] == num_cols
-        assert not np.isfortran(arrays[i])
-
-    cdef vector[double*] data
-
-    # The indices vector keeps track of the index of the next row to process in
-    # each array.
-    cdef vector[int] indices = num_arrays * [0]
-
-    # The sizes vector stores the total number of elements that each array has.
-    cdef vector[int] sizes
-
-    cdef priority_queue[pair[double, int]] queue
-    cdef pair[double, int] top
-    cdef int num_rows = sum([array.shape[0] for array in arrays])
-    cdef np.ndarray[np.float64_t, ndim=2] result = np.zeros(
-        (num_rows, num_cols), dtype=np.float64)
-    cdef double* result_ptr = <double*> np.PyArray_DATA(result)
-    for i in range(num_arrays):
-        if arrays[i].size > 0:
-            sizes.push_back(arrays[i].size)
-            data.push_back(<double*> np.PyArray_DATA(arrays[i]))
-            queue.push(pair[double, int](-data[i][0], i))
-
-    cdef int curr_idx = 0
-    cdef int j
-    cdef int col = 0
-
-    for j in range(num_rows):
-        top = queue.top()
-        for col in range(num_cols):
-            result_ptr[curr_idx + col] = (
-                data[top.second][indices[top.second] + col])
-
-        indices[top.second] += num_cols
-        curr_idx += num_cols
-
-        queue.pop()
-        if indices[top.second] < sizes[top.second]:
-            queue.push(
-                pair[double, int](-data[top.second][indices[top.second]],
-                                  top.second))
-
-    return result
diff --git a/python/examples/plasma/sorting/setup.py b/python/examples/plasma/sorting/setup.py
deleted file mode 100644
index a5dfa5a..0000000
--- a/python/examples/plasma/sorting/setup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import numpy as np
-from setuptools import setup
-from Cython.Build import cythonize
-
-setup(
-    name="multimerge",
-    extra_compile_args=["-O3", "-mtune=native", "-march=native"],
-    ext_modules=cythonize("multimerge.pyx"),
-    include_dirs=[np.get_include()],
-)
diff --git a/python/examples/plasma/sorting/sort_df.py b/python/examples/plasma/sorting/sort_df.py
deleted file mode 100644
index 2a51759..0000000
--- a/python/examples/plasma/sorting/sort_df.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from multiprocessing import Pool
-import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pyarrow.plasma as plasma
-import subprocess
-import time
-
-import multimerge
-
-# To run this example, you will first need to run "python setup.py install" in
-# this directory to build the Cython module.
-#
-# You will only see speedups if you run this code on more data, this is just a
-# small example that can run on a laptop.
-#
-# The values we used to get a speedup (on a m4.10xlarge instance on EC2) were
-#     object_store_size = 84 * 10 ** 9
-#     num_cores = 20
-#     num_rows = 10 ** 9
-#     num_cols = 1
-
-client = None
-object_store_size = 2 * 10 ** 9  # 2 GB
-num_cores = 8
-num_rows = 200000
-num_cols = 2
-column_names = [str(i) for i in range(num_cols)]
-column_to_sort = column_names[0]
-
-
-# Connect to clients
-def connect():
-    global client
-    client = plasma.connect('/tmp/store')
-    np.random.seed(int(time.time() * 10e7) % 10000000)
-
-
-def put_df(df):
-    record_batch = pa.RecordBatch.from_pandas(df)
-
-    # Get size of record batch and schema
-    mock_sink = pa.MockOutputStream()
-    stream_writer = pa.RecordBatchStreamWriter(mock_sink, record_batch.schema)
-    stream_writer.write_batch(record_batch)
-    data_size = mock_sink.size()
-
-    # Generate an ID and allocate a buffer in the object store for the
-    # serialized DataFrame
-    object_id = plasma.ObjectID(np.random.bytes(20))
-    buf = client.create(object_id, data_size)
-
-    # Write the serialized DataFrame to the object store
-    sink = pa.FixedSizeBufferWriter(buf)
-    stream_writer = pa.RecordBatchStreamWriter(sink, record_batch.schema)
-    stream_writer.write_batch(record_batch)
-
-    # Seal the object
-    client.seal(object_id)
-
-    return object_id
-
-
-def get_dfs(object_ids):
-    """Retrieve dataframes from the object store given their object IDs."""
-    buffers = client.get_buffers(object_ids)
-    return [pa.RecordBatchStreamReader(buf).read_next_batch().to_pandas()
-            for buf in buffers]
-
-
-def local_sort(object_id):
-    """Sort a partition of a dataframe."""
-    # Get the dataframe from the object store.
-    [df] = get_dfs([object_id])
-    # Sort the dataframe.
-    sorted_df = df.sort_values(by=column_to_sort)
-    # Get evenly spaced values from the dataframe.
-    indices = np.linspace(0, len(df) - 1, num=num_cores, dtype=np.int64)
-    # Put the sorted dataframe in the object store and return the corresponding
-    # object ID as well as the sampled values.
-    return put_df(sorted_df), sorted_df.as_matrix().take(indices)
-
-
-def local_partitions(object_id_and_pivots):
-    """Take a sorted partition of a dataframe and split it into more pieces."""
-    object_id, pivots = object_id_and_pivots
-    [df] = get_dfs([object_id])
-    split_at = df[column_to_sort].searchsorted(pivots)
-    split_at = [0] + list(split_at) + [len(df)]
-    # Partition the sorted dataframe and put each partition into the object
-    # store.
-    return [put_df(df[i:j]) for i, j in zip(split_at[:-1], split_at[1:])]
-
-
-def merge(object_ids):
-    """Merge a number of sorted dataframes into a single sorted dataframe."""
-    dfs = get_dfs(object_ids)
-
-    # In order to use our multimerge code, we have to convert the arrays from
-    # the Fortran format to the C format.
-    arrays = [np.ascontiguousarray(df.as_matrix()) for df in dfs]
-    for a in arrays:
-        assert a.dtype == np.float64
-        assert not np.isfortran(a)
-
-    # Filter out empty arrays.
-    arrays = [a for a in arrays if a.shape[0] > 0]
-
-    if len(arrays) == 0:
-        return None
-
-    resulting_array = multimerge.multimerge2d(*arrays)
-    merged_df2 = pd.DataFrame(resulting_array, columns=column_names)
-
-    return put_df(merged_df2)
-
-
-if __name__ == '__main__':
-    # Start the plasma store.
-    p = subprocess.Popen(['plasma_store',
-                          '-s', '/tmp/store',
-                          '-m', str(object_store_size)])
-
-    # Connect to the plasma store.
-    connect()
-
-    # Connect the processes in the pool.
-    pool = Pool(initializer=connect, initargs=(), processes=num_cores)
-
-    # Create a DataFrame from a numpy array.
-    df = pd.DataFrame(np.random.randn(num_rows, num_cols),
-                      columns=column_names)
-
-    partition_ids = [put_df(partition) for partition
-                     in np.split(df, num_cores)]
-
-    # Begin timing the parallel sort example.
-    parallel_sort_start = time.time()
-
-    # Sort each partition and subsample them. The subsampled values will be
-    # used to create buckets.
-    sorted_df_ids, pivot_groups = list(zip(*pool.map(local_sort,
-                                                     partition_ids)))
-
-    # Choose the pivots.
-    all_pivots = np.concatenate(pivot_groups)
-    indices = np.linspace(0, len(all_pivots) - 1, num=num_cores,
-                          dtype=np.int64)
-    pivots = np.take(np.sort(all_pivots), indices)
-
-    # Break all of the sorted partitions into even smaller partitions. Group
-    # the object IDs from each bucket together.
-    results = list(zip(*pool.map(local_partitions,
-                                 zip(sorted_df_ids,
-                                     len(sorted_df_ids) * [pivots]))))
-
-    # Merge each of the buckets and store the results in the object store.
-    object_ids = pool.map(merge, results)
-
-    resulting_ids = [object_id for object_id in object_ids
-                     if object_id is not None]
-
-    # Stop timing the paralle sort example.
-    parallel_sort_end = time.time()
-
-    print('Parallel sort took {} seconds.'
-          .format(parallel_sort_end - parallel_sort_start))
-
-    serial_sort_start = time.time()
-
-    original_sorted_df = df.sort_values(by=column_to_sort)
-
-    serial_sort_end = time.time()
-
-    # Check that we sorted the DataFrame properly.
-
-    sorted_dfs = get_dfs(resulting_ids)
-    sorted_df = pd.concat(sorted_dfs)
-
-    print('Serial sort took {} seconds.'
-          .format(serial_sort_end - serial_sort_start))
-
-    assert np.allclose(sorted_df.values, original_sorted_df.values)
-
-    # Kill the object store.
-    p.kill()
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
deleted file mode 100644
index 8cc54b4..0000000
--- a/python/pyarrow/__init__.pxd
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from libcpp.memory cimport shared_ptr
-from pyarrow.includes.libarrow cimport (CArray, CBuffer, CDataType,
-                                        CField, CRecordBatch, CSchema,
-                                        CTable, CTensor, CSparseCOOTensor,
-                                        CSparseCSRMatrix, CSparseCSCMatrix,
-                                        CSparseCSFTensor)
-
-cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
-    cdef int import_pyarrow() except -1
-    cdef object wrap_buffer(const shared_ptr[CBuffer]& buffer)
-    cdef object wrap_data_type(const shared_ptr[CDataType]& type)
-    cdef object wrap_field(const shared_ptr[CField]& field)
-    cdef object wrap_schema(const shared_ptr[CSchema]& schema)
-    cdef object wrap_array(const shared_ptr[CArray]& sp_array)
-    cdef object wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
-    cdef object wrap_sparse_tensor_coo(
-        const shared_ptr[CSparseCOOTensor]& sp_sparse_tensor)
-    cdef object wrap_sparse_tensor_csr(
-        const shared_ptr[CSparseCSRMatrix]& sp_sparse_tensor)
-    cdef object wrap_sparse_tensor_csc(
-        const shared_ptr[CSparseCSCMatrix]& sp_sparse_tensor)
-    cdef object wrap_sparse_tensor_csf(
-        const shared_ptr[CSparseCSFTensor]& sp_sparse_tensor)
-    cdef object wrap_table(const shared_ptr[CTable]& ctable)
-    cdef object wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
deleted file mode 100644
index adfd69c..0000000
--- a/python/pyarrow/__init__.py
+++ /dev/null
@@ -1,504 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# flake8: noqa
-
-"""
-PyArrow is the python implementation of Apache Arrow.
-
-Apache Arrow is a cross-language development platform for in-memory data.
-It specifies a standardized language-independent columnar memory format for
-flat and hierarchical data, organized for efficient analytic operations on
-modern hardware. It also provides computational libraries and zero-copy
-streaming messaging and interprocess communication.
-
-For more information see the official page at https://arrow.apache.org
-"""
-
-import gc as _gc
-import os as _os
-import sys as _sys
-import warnings as _warnings
-
-try:
-    from ._generated_version import version as __version__
-except ImportError:
-    # Package is not installed, parse git tag at runtime
-    try:
-        import setuptools_scm
-        # Code duplicated from setup.py to avoid a dependency on each other
-
-        def parse_git(root, **kwargs):
-            """
-            Parse function for setuptools_scm that ignores tags for non-C++
-            subprojects, e.g. apache-arrow-js-XXX tags.
-            """
-            from setuptools_scm.git import parse
-            kwargs['describe_command'] = \
-                "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'"
-            return parse(root, **kwargs)
-        __version__ = setuptools_scm.get_version('../',
-                                                 parse=parse_git)
-    except ImportError:
-        __version__ = None
-
-# ARROW-8684: Disable GC while initializing Cython extension module,
-# to workaround Cython bug in https://github.com/cython/cython/issues/3603
-_gc_enabled = _gc.isenabled()
-_gc.disable()
-import pyarrow.lib as _lib
-if _gc_enabled:
-    _gc.enable()
-
-from pyarrow.lib import (BuildInfo, RuntimeInfo, VersionInfo,
-                         cpp_build_info, cpp_version, cpp_version_info,
-                         runtime_info, cpu_count, set_cpu_count,
-                         enable_signal_handlers)
-
-
-def show_versions():
-    """
-    Print various version information, to help with error reporting.
-    """
-    # TODO: CPU information and flags
-    print("pyarrow version info\n--------------------")
-    print("Package kind: {}".format(cpp_build_info.package_kind
-                                    if len(cpp_build_info.package_kind) > 0
-                                    else "not indicated"))
-    print("Arrow C++ library version: {0}".format(cpp_build_info.version))
-    print("Arrow C++ compiler: {0} {1}"
-          .format(cpp_build_info.compiler_id, cpp_build_info.compiler_version))
-    print("Arrow C++ compiler flags: {0}"
-          .format(cpp_build_info.compiler_flags))
-    print("Arrow C++ git revision: {0}".format(cpp_build_info.git_id))
-    print("Arrow C++ git description: {0}"
-          .format(cpp_build_info.git_description))
-
-
-from pyarrow.lib import (null, bool_,
-                         int8, int16, int32, int64,
-                         uint8, uint16, uint32, uint64,
-                         time32, time64, timestamp, date32, date64, duration,
-                         float16, float32, float64,
-                         binary, string, utf8,
-                         large_binary, large_string, large_utf8,
-                         decimal128, decimal256,
-                         list_, large_list, map_, struct,
-                         union, sparse_union, dense_union,
-                         dictionary,
-                         field,
-                         type_for_alias,
-                         DataType, DictionaryType, StructType,
-                         ListType, LargeListType, MapType, FixedSizeListType,
-                         UnionType, SparseUnionType, DenseUnionType,
-                         TimestampType, Time32Type, Time64Type, DurationType,
-                         FixedSizeBinaryType, Decimal128Type, Decimal256Type,
-                         BaseExtensionType, ExtensionType,
-                         PyExtensionType, UnknownExtensionType,
-                         register_extension_type, unregister_extension_type,
-                         DictionaryMemo,
-                         KeyValueMetadata,
-                         Field,
-                         Schema,
-                         schema,
-                         unify_schemas,
-                         Array, Tensor,
-                         array, chunked_array, record_batch, nulls, repeat,
-                         SparseCOOTensor, SparseCSRMatrix, SparseCSCMatrix,
-                         SparseCSFTensor,
-                         infer_type, from_numpy_dtype,
-                         NullArray,
-                         NumericArray, IntegerArray, FloatingPointArray,
-                         BooleanArray,
-                         Int8Array, UInt8Array,
-                         Int16Array, UInt16Array,
-                         Int32Array, UInt32Array,
-                         Int64Array, UInt64Array,
-                         ListArray, LargeListArray, MapArray,
-                         FixedSizeListArray, UnionArray,
-                         BinaryArray, StringArray,
-                         LargeBinaryArray, LargeStringArray,
-                         FixedSizeBinaryArray,
-                         DictionaryArray,
-                         Date32Array, Date64Array, TimestampArray,
-                         Time32Array, Time64Array, DurationArray,
-                         Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
-                         scalar, NA, _NULL as NULL, Scalar,
-                         NullScalar, BooleanScalar,
-                         Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
-                         UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar,
-                         HalfFloatScalar, FloatScalar, DoubleScalar,
-                         Decimal128Scalar, Decimal256Scalar,
-                         ListScalar, LargeListScalar, FixedSizeListScalar,
-                         Date32Scalar, Date64Scalar,
-                         Time32Scalar, Time64Scalar,
-                         BinaryScalar, LargeBinaryScalar,
-                         StringScalar, LargeStringScalar,
-                         FixedSizeBinaryScalar, DictionaryScalar,
-                         MapScalar, UnionScalar, StructScalar,
-                         TimestampScalar, DurationScalar)
-
-# Buffers, allocation
-from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
-                         Codec, compress, decompress, allocate_buffer)
-
-from pyarrow.lib import (MemoryPool, LoggingMemoryPool, ProxyMemoryPool,
-                         total_allocated_bytes, set_memory_pool,
-                         default_memory_pool, system_memory_pool,
-                         jemalloc_memory_pool, mimalloc_memory_pool,
-                         logging_memory_pool, proxy_memory_pool,
-                         log_memory_allocations, jemalloc_set_decay_ms)
-
-# I/O
-from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
-                         BufferedInputStream, BufferedOutputStream,
-                         CompressedInputStream, CompressedOutputStream,
-                         TransformInputStream, transcoding_input_stream,
-                         FixedSizeBufferWriter,
-                         BufferReader, BufferOutputStream,
-                         OSFile, MemoryMappedFile, memory_map,
-                         create_memory_map, have_libhdfs,
-                         MockOutputStream, input_stream, output_stream)
-
-from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
-                         concat_arrays, concat_tables)
-
-# Exceptions
-from pyarrow.lib import (ArrowCancelled,
-                         ArrowCapacityError,
-                         ArrowException,
-                         ArrowKeyError,
-                         ArrowIndexError,
-                         ArrowInvalid,
-                         ArrowIOError,
-                         ArrowMemoryError,
-                         ArrowNotImplementedError,
-                         ArrowTypeError,
-                         ArrowSerializationError)
-
-# Serialization
-from pyarrow.lib import (deserialize_from, deserialize,
-                         deserialize_components,
-                         serialize, serialize_to, read_serialized,
-                         SerializationCallbackError,
-                         DeserializationCallbackError)
-
-import pyarrow.hdfs as hdfs
-
-from pyarrow.ipc import serialize_pandas, deserialize_pandas
-import pyarrow.ipc as ipc
-
-from pyarrow.serialization import (default_serialization_context,
-                                   register_default_serialization_handlers,
-                                   register_torch_serialization_handlers)
-
-import pyarrow.types as types
-
-
-# deprecated top-level access
-
-
-from pyarrow.filesystem import FileSystem as _FileSystem
-from pyarrow.filesystem import LocalFileSystem as _LocalFileSystem
-from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem
-
-from pyarrow.lib import SerializationContext as _SerializationContext
-from pyarrow.lib import SerializedPyObject as _SerializedPyObject
-
-
-_localfs = _LocalFileSystem._get_instance()
-
-
-_msg = (
-    "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead."
-)
-
-_serialization_msg = (
-    "'pyarrow.{0}' is deprecated and will be removed in a future version. "
-    "Use pickle or the pyarrow IPC functionality instead."
-)
-
-_deprecated = {
-    "localfs": (_localfs, "LocalFileSystem"),
-    "FileSystem": (_FileSystem, "FileSystem"),
-    "LocalFileSystem": (_LocalFileSystem, "LocalFileSystem"),
-    "HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"),
-}
-
-_serialization_deprecatd = {
-    "SerializationContext": _SerializationContext,
-    "SerializedPyObject": _SerializedPyObject,
-}
-
-if _sys.version_info >= (3, 7):
-    def __getattr__(name):
-        if name in _deprecated:
-            obj, new_name = _deprecated[name]
-            _warnings.warn(_msg.format(name, new_name),
-                           FutureWarning, stacklevel=2)
-            return obj
-        elif name in _serialization_deprecatd:
-            _warnings.warn(_serialization_msg.format(name),
-                           FutureWarning, stacklevel=2)
-            return _serialization_deprecatd[name]
-
-        raise AttributeError(
-            "module 'pyarrow' has no attribute '{0}'".format(name)
-        )
-else:
-    localfs = _localfs
-    FileSystem = _FileSystem
-    LocalFileSystem = _LocalFileSystem
-    HadoopFileSystem = _HadoopFileSystem
-    SerializationContext = _SerializationContext
-    SerializedPyObject = _SerializedPyObject
-
-
-# Entry point for starting the plasma store
-
-
-def _plasma_store_entry_point():
-    """Entry point for starting the plasma store.
-
-    This can be used by invoking e.g.
-    ``plasma_store -s /tmp/plasma -m 1000000000``
-    from the command line and will start the plasma_store executable with the
-    given arguments.
-    """
-    import pyarrow
-    plasma_store_executable = _os.path.join(pyarrow.__path__[0],
-                                            "plasma-store-server")
-    _os.execv(plasma_store_executable, _sys.argv)
-
-
-# ----------------------------------------------------------------------
-# Deprecations
-
-from pyarrow.util import _deprecate_api, _deprecate_class
-
-read_message = _deprecate_api("read_message", "ipc.read_message",
-                              ipc.read_message, "0.17.0")
-
-read_record_batch = _deprecate_api("read_record_batch",
-                                   "ipc.read_record_batch",
-                                   ipc.read_record_batch, "0.17.0")
-
-read_schema = _deprecate_api("read_schema", "ipc.read_schema",
-                             ipc.read_schema, "0.17.0")
-
-read_tensor = _deprecate_api("read_tensor", "ipc.read_tensor",
-                             ipc.read_tensor, "0.17.0")
-
-write_tensor = _deprecate_api("write_tensor", "ipc.write_tensor",
-                              ipc.write_tensor, "0.17.0")
-
-get_record_batch_size = _deprecate_api("get_record_batch_size",
-                                       "ipc.get_record_batch_size",
-                                       ipc.get_record_batch_size, "0.17.0")
-
-get_tensor_size = _deprecate_api("get_tensor_size",
-                                 "ipc.get_tensor_size",
-                                 ipc.get_tensor_size, "0.17.0")
-
-open_stream = _deprecate_api("open_stream", "ipc.open_stream",
-                             ipc.open_stream, "0.17.0")
-
-open_file = _deprecate_api("open_file", "ipc.open_file", ipc.open_file,
-                           "0.17.0")
-
-
-def _deprecate_scalar(ty, symbol):
-    return _deprecate_class("{}Value".format(ty), symbol, "1.0.0")
-
-
-ArrayValue = _deprecate_class("ArrayValue", Scalar, "1.0.0")
-NullType = _deprecate_class("NullType", NullScalar, "1.0.0")
-
-BooleanValue = _deprecate_scalar("Boolean", BooleanScalar)
-Int8Value = _deprecate_scalar("Int8", Int8Scalar)
-Int16Value = _deprecate_scalar("Int16", Int16Scalar)
-Int32Value = _deprecate_scalar("Int32", Int32Scalar)
-Int64Value = _deprecate_scalar("Int64", Int64Scalar)
-UInt8Value = _deprecate_scalar("UInt8", UInt8Scalar)
-UInt16Value = _deprecate_scalar("UInt16", UInt16Scalar)
-UInt32Value = _deprecate_scalar("UInt32", UInt32Scalar)
-UInt64Value = _deprecate_scalar("UInt64", UInt64Scalar)
-HalfFloatValue = _deprecate_scalar("HalfFloat", HalfFloatScalar)
-FloatValue = _deprecate_scalar("Float", FloatScalar)
-DoubleValue = _deprecate_scalar("Double", DoubleScalar)
-ListValue = _deprecate_scalar("List", ListScalar)
-LargeListValue = _deprecate_scalar("LargeList", LargeListScalar)
-MapValue = _deprecate_scalar("Map", MapScalar)
-FixedSizeListValue = _deprecate_scalar("FixedSizeList", FixedSizeListScalar)
-BinaryValue = _deprecate_scalar("Binary", BinaryScalar)
-StringValue = _deprecate_scalar("String", StringScalar)
-LargeBinaryValue = _deprecate_scalar("LargeBinary", LargeBinaryScalar)
-LargeStringValue = _deprecate_scalar("LargeString", LargeStringScalar)
-FixedSizeBinaryValue = _deprecate_scalar("FixedSizeBinary",
-                                         FixedSizeBinaryScalar)
-Decimal128Value = _deprecate_scalar("Decimal128", Decimal128Scalar)
-Decimal256Value = _deprecate_scalar("Decimal256", Decimal256Scalar)
-UnionValue = _deprecate_scalar("Union", UnionScalar)
-StructValue = _deprecate_scalar("Struct", StructScalar)
-DictionaryValue = _deprecate_scalar("Dictionary", DictionaryScalar)
-Date32Value = _deprecate_scalar("Date32", Date32Scalar)
-Date64Value = _deprecate_scalar("Date64", Date64Scalar)
-Time32Value = _deprecate_scalar("Time32", Time32Scalar)
-Time64Value = _deprecate_scalar("Time64", Time64Scalar)
-TimestampValue = _deprecate_scalar("Timestamp", TimestampScalar)
-DurationValue = _deprecate_scalar("Duration", DurationScalar)
-
-
-# TODO: Deprecate these somehow in the pyarrow namespace
-from pyarrow.ipc import (Message, MessageReader, MetadataVersion,
-                         RecordBatchFileReader, RecordBatchFileWriter,
-                         RecordBatchStreamReader, RecordBatchStreamWriter)
-
-# ----------------------------------------------------------------------
-# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
-# wheels)
-
-
-def get_include():
-    """
-    Return absolute path to directory containing Arrow C++ include
-    headers. Similar to numpy.get_include
-    """
-    return _os.path.join(_os.path.dirname(__file__), 'include')
-
-
-def _get_pkg_config_executable():
-    return _os.environ.get('PKG_CONFIG', 'pkg-config')
-
-
-def _has_pkg_config(pkgname):
-    import subprocess
-    try:
-        return subprocess.call([_get_pkg_config_executable(),
-                                '--exists', pkgname]) == 0
-    except FileNotFoundError:
-        return False
-
-
-def _read_pkg_config_variable(pkgname, cli_args):
-    import subprocess
-    cmd = [_get_pkg_config_executable(), pkgname] + cli_args
-    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                            stderr=subprocess.PIPE)
-    out, err = proc.communicate()
-    if proc.returncode != 0:
-        raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
-    return out.rstrip().decode('utf8')
-
-
-def get_libraries():
-    """
-    Return list of library names to include in the `libraries` argument for C
-    or Cython extensions using pyarrow
-    """
-    return ['arrow', 'arrow_python']
-
-
-def create_library_symlinks():
-    """
-    With Linux and macOS wheels, the bundled shared libraries have an embedded
-    ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
-    with -larrow won't work unless we create symlinks at locations like
-    site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
-    prior problems we had with shipping two copies of the shared libraries to
-    permit third party projects like turbodbc to build their C++ extensions
-    against the pyarrow wheels.
-
-    This function must only be invoked once and only when the shared libraries
-    are bundled with the Python package, which should only apply to wheel-based
-    installs. It requires write access to the site-packages/pyarrow directory
-    and so depending on your system may need to be run with root.
-    """
-    import glob
-    if _sys.platform == 'win32':
-        return
-    package_cwd = _os.path.dirname(__file__)
-
-    if _sys.platform == 'linux':
-        bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))
-
-        def get_symlink_path(hard_path):
-            return hard_path.rsplit('.', 1)[0]
-    else:
-        bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))
-
-        def get_symlink_path(hard_path):
-            return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
-
-    for lib_hard_path in bundled_libs:
-        symlink_path = get_symlink_path(lib_hard_path)
-        if _os.path.exists(symlink_path):
-            continue
-        try:
-            _os.symlink(lib_hard_path, symlink_path)
-        except PermissionError:
-            print("Tried creating symlink {}. If you need to link to "
-                  "bundled shared libraries, run "
-                  "pyarrow.create_library_symlinks() as root")
-
-
-def get_library_dirs():
-    """
-    Return lists of directories likely to contain Arrow C++ libraries for
-    linking C or Cython extensions using pyarrow
-    """
-    package_cwd = _os.path.dirname(__file__)
-    library_dirs = [package_cwd]
-
-    def append_library_dir(library_dir):
-        if library_dir not in library_dirs:
-            library_dirs.append(library_dir)
-
-    # Search library paths via pkg-config. This is necessary if the user
-    # installed libarrow and the other shared libraries manually and they
-    # are not shipped inside the pyarrow package (see also ARROW-2976).
-    pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
-    for pkgname in ["arrow", "arrow_python"]:
-        if _has_pkg_config(pkgname):
-            library_dir = _read_pkg_config_variable(pkgname,
-                                                    ["--libs-only-L"])
-            # pkg-config output could be empty if Arrow is installed
-            # as a system package.
-            if library_dir:
-                if not library_dir.startswith("-L"):
-                    raise ValueError(
-                        "pkg-config --libs-only-L returned unexpected "
-                        "value {!r}".format(library_dir))
-                append_library_dir(library_dir[2:])
-
-    if _sys.platform == 'win32':
-        # TODO(wesm): Is this necessary, or does setuptools within a conda
-        # installation add Library\lib to the linker path for MSVC?
-        python_base_install = _os.path.dirname(_sys.executable)
-        library_dir = _os.path.join(python_base_install, 'Library', 'lib')
-
-        if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
-            append_library_dir(library_dir)
-
-    # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
-    if _os.environ.get('ARROW_HOME'):
-        append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
-    else:
-        # Python wheels bundle the Arrow libraries in the pyarrow directory.
-        append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
-
-    return library_dirs
diff --git a/python/pyarrow/_compute.pxd b/python/pyarrow/_compute.pxd
deleted file mode 100644
index e187ed7..0000000
--- a/python/pyarrow/_compute.pxd
+++ /dev/null
@@ -1,27 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from pyarrow.lib cimport *
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-
-
-cdef class FunctionOptions(_Weakrefable):
-
-    cdef const CFunctionOptions* get_options(self) except NULL
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
deleted file mode 100644
index 1515bdc..0000000
--- a/python/pyarrow/_compute.pyx
+++ /dev/null
@@ -1,1092 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from cython.operator cimport dereference as deref
-
-from collections import namedtuple
-
-from pyarrow.lib import frombytes, tobytes, ordered_dict
-from pyarrow.lib cimport *
-from pyarrow.includes.libarrow cimport *
-import pyarrow.lib as lib
-
-import numpy as np
-
-
-cdef wrap_scalar_function(const shared_ptr[CFunction]& sp_func):
-    """
-    Wrap a C++ scalar Function in a ScalarFunction object.
-    """
-    cdef ScalarFunction func = ScalarFunction.__new__(ScalarFunction)
-    func.init(sp_func)
-    return func
-
-
-cdef wrap_vector_function(const shared_ptr[CFunction]& sp_func):
-    """
-    Wrap a C++ vector Function in a VectorFunction object.
-    """
-    cdef VectorFunction func = VectorFunction.__new__(VectorFunction)
-    func.init(sp_func)
-    return func
-
-
-cdef wrap_scalar_aggregate_function(const shared_ptr[CFunction]& sp_func):
-    """
-    Wrap a C++ aggregate Function in a ScalarAggregateFunction object.
-    """
-    cdef ScalarAggregateFunction func = (
-        ScalarAggregateFunction.__new__(ScalarAggregateFunction)
-    )
-    func.init(sp_func)
-    return func
-
-
-cdef wrap_hash_aggregate_function(const shared_ptr[CFunction]& sp_func):
-    """
-    Wrap a C++ aggregate Function in a HashAggregateFunction object.
-    """
-    cdef HashAggregateFunction func = (
-        HashAggregateFunction.__new__(HashAggregateFunction)
-    )
-    func.init(sp_func)
-    return func
-
-
-cdef wrap_meta_function(const shared_ptr[CFunction]& sp_func):
-    """
-    Wrap a C++ meta Function in a MetaFunction object.
-    """
-    cdef MetaFunction func = (
-        MetaFunction.__new__(MetaFunction)
-    )
-    func.init(sp_func)
-    return func
-
-
-cdef wrap_function(const shared_ptr[CFunction]& sp_func):
-    """
-    Wrap a C++ Function in a Function object.
-
-    This dispatches to specialized wrappers depending on the function kind.
-    """
-    if sp_func.get() == NULL:
-        raise ValueError('Function was NULL')
-
-    cdef FunctionKind c_kind = sp_func.get().kind()
-    if c_kind == FunctionKind_SCALAR:
-        return wrap_scalar_function(sp_func)
-    elif c_kind == FunctionKind_VECTOR:
-        return wrap_vector_function(sp_func)
-    elif c_kind == FunctionKind_SCALAR_AGGREGATE:
-        return wrap_scalar_aggregate_function(sp_func)
-    elif c_kind == FunctionKind_HASH_AGGREGATE:
-        return wrap_hash_aggregate_function(sp_func)
-    elif c_kind == FunctionKind_META:
-        return wrap_meta_function(sp_func)
-    else:
-        raise NotImplementedError("Unknown Function::Kind")
-
-
-cdef wrap_scalar_kernel(const CScalarKernel* c_kernel):
-    if c_kernel == NULL:
-        raise ValueError('Kernel was NULL')
-    cdef ScalarKernel kernel = ScalarKernel.__new__(ScalarKernel)
-    kernel.init(c_kernel)
-    return kernel
-
-
-cdef wrap_vector_kernel(const CVectorKernel* c_kernel):
-    if c_kernel == NULL:
-        raise ValueError('Kernel was NULL')
-    cdef VectorKernel kernel = VectorKernel.__new__(VectorKernel)
-    kernel.init(c_kernel)
-    return kernel
-
-
-cdef wrap_scalar_aggregate_kernel(const CScalarAggregateKernel* c_kernel):
-    if c_kernel == NULL:
-        raise ValueError('Kernel was NULL')
-    cdef ScalarAggregateKernel kernel = (
-        ScalarAggregateKernel.__new__(ScalarAggregateKernel)
-    )
-    kernel.init(c_kernel)
-    return kernel
-
-
-cdef wrap_hash_aggregate_kernel(const CHashAggregateKernel* c_kernel):
-    if c_kernel == NULL:
-        raise ValueError('Kernel was NULL')
-    cdef HashAggregateKernel kernel = (
-        HashAggregateKernel.__new__(HashAggregateKernel)
-    )
-    kernel.init(c_kernel)
-    return kernel
-
-
-cdef class Kernel(_Weakrefable):
-    """
-    A kernel object.
-
-    Kernels handle the execution of a Function for a certain signature.
-    """
-
-    def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly"
-                        .format(self.__class__.__name__))
-
-
-cdef class ScalarKernel(Kernel):
-    cdef:
-        const CScalarKernel* kernel
-
-    cdef void init(self, const CScalarKernel* kernel) except *:
-        self.kernel = kernel
-
-    def __repr__(self):
-        return ("ScalarKernel<{}>"
-                .format(frombytes(self.kernel.signature.get().ToString())))
-
-
-cdef class VectorKernel(Kernel):
-    cdef:
-        const CVectorKernel* kernel
-
-    cdef void init(self, const CVectorKernel* kernel) except *:
-        self.kernel = kernel
-
-    def __repr__(self):
-        return ("VectorKernel<{}>"
-                .format(frombytes(self.kernel.signature.get().ToString())))
-
-
-cdef class ScalarAggregateKernel(Kernel):
-    cdef:
-        const CScalarAggregateKernel* kernel
-
-    cdef void init(self, const CScalarAggregateKernel* kernel) except *:
-        self.kernel = kernel
-
-    def __repr__(self):
-        return ("ScalarAggregateKernel<{}>"
-                .format(frombytes(self.kernel.signature.get().ToString())))
-
-
-cdef class HashAggregateKernel(Kernel):
-    cdef:
-        const CHashAggregateKernel* kernel
-
-    cdef void init(self, const CHashAggregateKernel* kernel) except *:
-        self.kernel = kernel
-
-    def __repr__(self):
-        return ("HashAggregateKernel<{}>"
-                .format(frombytes(self.kernel.signature.get().ToString())))
-
-
-FunctionDoc = namedtuple(
-    "FunctionDoc",
-    ("summary", "description", "arg_names", "options_class"))
-
-
-cdef class Function(_Weakrefable):
-    """
-    A compute function.
-
-    A function implements a certain logical computation over a range of
-    possible input signatures.  Each signature accepts a range of input
-    types and is implemented by a given Kernel.
-
-    Functions can be of different kinds:
-
-    * "scalar" functions apply an item-wise computation over all items
-      of their inputs.  Each item in the output only depends on the values
-      of the inputs at the same position.  Examples: addition, comparisons,
-      string predicates...
-
-    * "vector" functions apply a collection-wise computation, such that
-      each item in the output may depend on the values of several items
-      in each input.  Examples: dictionary encoding, sorting, extracting
-      unique values...
-
-    * "scalar_aggregate" functions reduce the dimensionality of the inputs by
-      applying a reduction function.  Examples: sum, min_max, mode...
-
-    * "hash_aggregate" functions apply a reduction function to an input
-      subdivided by grouping criteria.  They may not be directly called.
-      Examples: hash_sum, hash_min_max...
-
-    * "meta" functions dispatch to other functions.
-    """
-    cdef:
-        shared_ptr[CFunction] sp_func
-        CFunction* base_func
-
-    def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly"
-                        .format(self.__class__.__name__))
-
-    cdef void init(self, const shared_ptr[CFunction]& sp_func) except *:
-        self.sp_func = sp_func
-        self.base_func = sp_func.get()
-
-    def __repr__(self):
-        return ("arrow.compute.Function<name={}, kind={}, "
-                "arity={}, num_kernels={}>"
-                ).format(self.name, self.kind, self.arity, self.num_kernels)
-
-    def __reduce__(self):
-        # Reduction uses the global registry
-        return get_function, (self.name,)
-
-    @property
-    def name(self):
-        """
-        The function name.
-        """
-        return frombytes(self.base_func.name())
-
-    @property
-    def arity(self):
-        """
-        The function arity.
-
-        If Ellipsis (i.e. `...`) is returned, the function takes a variable
-        number of arguments.
-        """
-        cdef CArity arity = self.base_func.arity()
-        if arity.is_varargs:
-            return ...
-        else:
-            return arity.num_args
-
-    @property
-    def kind(self):
-        """
-        The function kind.
-        """
-        cdef FunctionKind c_kind = self.base_func.kind()
-        if c_kind == FunctionKind_SCALAR:
-            return 'scalar'
-        elif c_kind == FunctionKind_VECTOR:
-            return 'vector'
-        elif c_kind == FunctionKind_SCALAR_AGGREGATE:
-            return 'scalar_aggregate'
-        elif c_kind == FunctionKind_HASH_AGGREGATE:
-            return 'hash_aggregate'
-        elif c_kind == FunctionKind_META:
-            return 'meta'
-        else:
-            raise NotImplementedError("Unknown Function::Kind")
-
-    @property
-    def _doc(self):
-        """
-        The C++-like function documentation (for internal use).
-        """
-        cdef CFunctionDoc c_doc = self.base_func.doc()
-
-        return FunctionDoc(frombytes(c_doc.summary),
-                           frombytes(c_doc.description),
-                           [frombytes(s) for s in c_doc.arg_names],
-                           frombytes(c_doc.options_class))
-
-    @property
-    def num_kernels(self):
-        """
-        The number of kernels implementing this function.
-        """
-        return self.base_func.num_kernels()
-
-    def call(self, args, FunctionOptions options=None,
-             MemoryPool memory_pool=None):
-        """
-        Call the function on the given arguments.
-        """
-        cdef:
-            const CFunctionOptions* c_options = NULL
-            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
-            CExecContext c_exec_ctx = CExecContext(pool)
-            vector[CDatum] c_args
-            CDatum result
-
-        _pack_compute_args(args, &c_args)
-
-        if options is not None:
-            c_options = options.get_options()
-
-        with nogil:
-            result = GetResultValue(self.base_func.Execute(c_args,
-                                                           c_options,
-                                                           &c_exec_ctx))
-
-        return wrap_datum(result)
-
-
-cdef class ScalarFunction(Function):
-    cdef:
-        const CScalarFunction* func
-
-    cdef void init(self, const shared_ptr[CFunction]& sp_func) except *:
-        Function.init(self, sp_func)
-        self.func = <const CScalarFunction*> sp_func.get()
-
-    @property
-    def kernels(self):
-        """
-        The kernels implementing this function.
-        """
-        cdef vector[const CScalarKernel*] kernels = self.func.kernels()
-        return [wrap_scalar_kernel(k) for k in kernels]
-
-
-cdef class VectorFunction(Function):
-    cdef:
-        const CVectorFunction* func
-
-    cdef void init(self, const shared_ptr[CFunction]& sp_func) except *:
-        Function.init(self, sp_func)
-        self.func = <const CVectorFunction*> sp_func.get()
-
-    @property
-    def kernels(self):
-        """
-        The kernels implementing this function.
-        """
-        cdef vector[const CVectorKernel*] kernels = self.func.kernels()
-        return [wrap_vector_kernel(k) for k in kernels]
-
-
-cdef class ScalarAggregateFunction(Function):
-    cdef:
-        const CScalarAggregateFunction* func
-
-    cdef void init(self, const shared_ptr[CFunction]& sp_func) except *:
-        Function.init(self, sp_func)
-        self.func = <const CScalarAggregateFunction*> sp_func.get()
-
-    @property
-    def kernels(self):
-        """
-        The kernels implementing this function.
-        """
-        cdef vector[const CScalarAggregateKernel*] kernels = (
-            self.func.kernels()
-        )
-        return [wrap_scalar_aggregate_kernel(k) for k in kernels]
-
-
-cdef class HashAggregateFunction(Function):
-    cdef:
-        const CHashAggregateFunction* func
-
-    cdef void init(self, const shared_ptr[CFunction]& sp_func) except *:
-        Function.init(self, sp_func)
-        self.func = <const CHashAggregateFunction*> sp_func.get()
-
-    @property
-    def kernels(self):
-        """
-        The kernels implementing this function.
-        """
-        cdef vector[const CHashAggregateKernel*] kernels = (
-            self.func.kernels()
-        )
-        return [wrap_hash_aggregate_kernel(k) for k in kernels]
-
-
-cdef class MetaFunction(Function):
-    cdef:
-        const CMetaFunction* func
-
-    cdef void init(self, const shared_ptr[CFunction]& sp_func) except *:
-        Function.init(self, sp_func)
-        self.func = <const CMetaFunction*> sp_func.get()
-
-    # Since num_kernels is exposed, also expose a kernels property
-
-    @property
-    def kernels(self):
-        """
-        The kernels implementing this function.
-        """
-        return []
-
-
-cdef _pack_compute_args(object values, vector[CDatum]* out):
-    for val in values:
-        if isinstance(val, (list, np.ndarray)):
-            val = lib.asarray(val)
-
-        if isinstance(val, Array):
-            out.push_back(CDatum((<Array> val).sp_array))
-            continue
-        elif isinstance(val, ChunkedArray):
-            out.push_back(CDatum((<ChunkedArray> val).sp_chunked_array))
-            continue
-        elif isinstance(val, Scalar):
-            out.push_back(CDatum((<Scalar> val).unwrap()))
-            continue
-        elif isinstance(val, RecordBatch):
-            out.push_back(CDatum((<RecordBatch> val).sp_batch))
-            continue
-        elif isinstance(val, Table):
-            out.push_back(CDatum((<Table> val).sp_table))
-            continue
-        else:
-            # Is it a Python scalar?
-            try:
-                scal = lib.scalar(val)
-            except Exception:
-                # Raise dedicated error below
-                pass
-            else:
-                out.push_back(CDatum((<Scalar> scal).unwrap()))
-                continue
-
-        raise TypeError("Got unexpected argument type {} "
-                        "for compute function".format(type(val)))
-
-
-cdef class FunctionRegistry(_Weakrefable):
-    cdef:
-        CFunctionRegistry* registry
-
-    def __init__(self):
-        self.registry = GetFunctionRegistry()
-
-    def list_functions(self):
-        """
-        Return all function names in the registry.
-        """
-        cdef vector[c_string] names = self.registry.GetFunctionNames()
-        return [frombytes(name) for name in names]
-
-    def get_function(self, name):
-        """
-        Look up a function by name in the registry.
-        """
-        cdef:
-            c_string c_name = tobytes(name)
-            shared_ptr[CFunction] func
-        with nogil:
-            func = GetResultValue(self.registry.GetFunction(c_name))
-        return wrap_function(func)
-
-
-cdef FunctionRegistry _global_func_registry = FunctionRegistry()
-
-
-def function_registry():
-    return _global_func_registry
-
-
-def get_function(name):
-    """
-    Get a function by name.
-
-    The function is looked up in the global registry
-    (as returned by `function_registry()`).
-    """
-    return _global_func_registry.get_function(name)
-
-
-def list_functions():
-    """
-    Return all function names in the global registry.
-    """
-    return _global_func_registry.list_functions()
-
-
-def call_function(name, args, options=None, memory_pool=None):
-    """
-    Call a named function.
-
-    The function is looked up in the global registry
-    (as returned by `function_registry()`).
-    """
-    func = _global_func_registry.get_function(name)
-    return func.call(args, options=options, memory_pool=memory_pool)
-
-
-cdef class FunctionOptions(_Weakrefable):
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        raise NotImplementedError("Unimplemented base options")
-
-
-# NOTE:
-# To properly expose the constructor signature of FunctionOptions
-# subclasses, we use a two-level inheritance:
-# 1. a C extension class that implements option validation and setting
-#    (won't expose function signatures because of
-#     https://github.com/cython/cython/issues/3873)
-# 2. a Python derived class that implements the constructor
-
-cdef class _CastOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CCastOptions] options
-
-    __slots__ = ()  # avoid mistakingly creating attributes
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.options.get()
-
-    def _set_options(self, DataType target_type, allow_int_overflow,
-                     allow_time_truncate, allow_time_overflow,
-                     allow_float_truncate, allow_invalid_utf8):
-        self.options.reset(new CCastOptions())
-        self._set_type(target_type)
-        if allow_int_overflow is not None:
-            self.allow_int_overflow = allow_int_overflow
-        if allow_time_truncate is not None:
-            self.allow_time_truncate = allow_time_truncate
-        if allow_time_overflow is not None:
-            self.allow_time_overflow = allow_time_overflow
-        if allow_float_truncate is not None:
-            self.allow_float_truncate = allow_float_truncate
-        if allow_invalid_utf8 is not None:
-            self.allow_invalid_utf8 = allow_invalid_utf8
-
-    def _set_type(self, target_type=None):
-        if target_type is not None:
-            deref(self.options).to_type = (
-                (<DataType> ensure_type(target_type)).sp_type
-            )
-
-    def _set_safe(self):
-        self.options.reset(new CCastOptions(CCastOptions.Safe()))
-
-    def _set_unsafe(self):
-        self.options.reset(new CCastOptions(CCastOptions.Unsafe()))
-
-    def is_safe(self):
-        return not (
-            deref(self.options).allow_int_overflow or
-            deref(self.options).allow_time_truncate or
-            deref(self.options).allow_time_overflow or
-            deref(self.options).allow_float_truncate or
-            deref(self.options).allow_invalid_utf8
-        )
-
-    @property
-    def allow_int_overflow(self):
-        return deref(self.options).allow_int_overflow
-
-    @allow_int_overflow.setter
-    def allow_int_overflow(self, bint flag):
-        deref(self.options).allow_int_overflow = flag
-
-    @property
-    def allow_time_truncate(self):
-        return deref(self.options).allow_time_truncate
-
-    @allow_time_truncate.setter
-    def allow_time_truncate(self, bint flag):
-        deref(self.options).allow_time_truncate = flag
-
-    @property
-    def allow_time_overflow(self):
-        return deref(self.options).allow_time_overflow
-
-    @allow_time_overflow.setter
-    def allow_time_overflow(self, bint flag):
-        deref(self.options).allow_time_overflow = flag
-
-    @property
-    def allow_float_truncate(self):
-        return deref(self.options).allow_float_truncate
-
-    @allow_float_truncate.setter
-    def allow_float_truncate(self, bint flag):
-        deref(self.options).allow_float_truncate = flag
-
-    @property
-    def allow_invalid_utf8(self):
-        return deref(self.options).allow_invalid_utf8
-
-    @allow_invalid_utf8.setter
-    def allow_invalid_utf8(self, bint flag):
-        deref(self.options).allow_invalid_utf8 = flag
-
-
-class CastOptions(_CastOptions):
-
-    def __init__(self, target_type=None, *, allow_int_overflow=None,
-                 allow_time_truncate=None, allow_time_overflow=None,
-                 allow_float_truncate=None, allow_invalid_utf8=None):
-        self._set_options(target_type, allow_int_overflow,
-                          allow_time_truncate, allow_time_overflow,
-                          allow_float_truncate, allow_invalid_utf8)
-
-    @staticmethod
-    def safe(target_type=None):
-        self = CastOptions()
-        self._set_safe()
-        self._set_type(target_type)
-        return self
-
-    @staticmethod
-    def unsafe(target_type=None):
-        self = CastOptions()
-        self._set_unsafe()
-        self._set_type(target_type)
-        return self
-
-
-cdef class _MatchSubstringOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CMatchSubstringOptions] match_substring_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.match_substring_options.get()
-
-    def _set_options(self, pattern):
-        self.match_substring_options.reset(
-            new CMatchSubstringOptions(tobytes(pattern)))
-
-
-class MatchSubstringOptions(_MatchSubstringOptions):
-    def __init__(self, pattern):
-        self._set_options(pattern)
-
-
-cdef class _TrimOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTrimOptions] trim_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.trim_options.get()
-
-    def _set_options(self, characters):
-        self.trim_options.reset(
-            new CTrimOptions(tobytes(characters)))
-
-
-class TrimOptions(_TrimOptions):
-    def __init__(self, characters):
-        self._set_options(characters)
-
-
-cdef class _ReplaceSubstringOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CReplaceSubstringOptions] replace_substring_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.replace_substring_options.get()
-
-    def _set_options(self, pattern, replacement, max_replacements):
-        self.replace_substring_options.reset(
-            new CReplaceSubstringOptions(tobytes(pattern),
-                                         tobytes(replacement),
-                                         max_replacements)
-        )
-
-
-class ReplaceSubstringOptions(_ReplaceSubstringOptions):
-    def __init__(self, pattern, replacement, max_replacements=-1):
-        self._set_options(pattern, replacement, max_replacements)
-
-
-cdef class _FilterOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CFilterOptions] filter_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.filter_options.get()
-
-    def _set_options(self, null_selection_behavior):
-        if null_selection_behavior == 'drop':
-            self.filter_options.reset(
-                new CFilterOptions(CFilterNullSelectionBehavior_DROP))
-        elif null_selection_behavior == 'emit_null':
-            self.filter_options.reset(
-                new CFilterOptions(CFilterNullSelectionBehavior_EMIT_NULL))
-        else:
-            raise ValueError(
-                '"{}" is not a valid null_selection_behavior'
-                .format(null_selection_behavior))
-
-
-class FilterOptions(_FilterOptions):
-    def __init__(self, null_selection_behavior='drop'):
-        self._set_options(null_selection_behavior)
-
-
-cdef class _DictionaryEncodeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CDictionaryEncodeOptions] dictionary_encode_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.dictionary_encode_options.get()
-
-    def _set_options(self, null_encoding_behavior):
-        if null_encoding_behavior == 'encode':
-            self.dictionary_encode_options.reset(
-                new CDictionaryEncodeOptions(
-                    CDictionaryEncodeNullEncodingBehavior_ENCODE))
-        elif null_encoding_behavior == 'mask':
-            self.dictionary_encode_options.reset(
-                new CDictionaryEncodeOptions(
-                    CDictionaryEncodeNullEncodingBehavior_MASK))
-        else:
-            raise ValueError('"{}" is not a valid null_encoding_behavior'
-                             .format(null_encoding_behavior))
-
-
-class DictionaryEncodeOptions(_DictionaryEncodeOptions):
-    def __init__(self, null_encoding_behavior='mask'):
-        self._set_options(null_encoding_behavior)
-
-
-cdef class _TakeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTakeOptions] take_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.take_options.get()
-
-    def _set_options(self, boundscheck):
-        self.take_options.reset(new CTakeOptions(boundscheck))
-
-
-class TakeOptions(_TakeOptions):
-    def __init__(self, *, boundscheck=True):
-        self._set_options(boundscheck)
-
-
-cdef class _PartitionNthOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CPartitionNthOptions] partition_nth_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.partition_nth_options.get()
-
-    def _set_options(self, int64_t pivot):
-        self.partition_nth_options.reset(new CPartitionNthOptions(pivot))
-
-
-class PartitionNthOptions(_PartitionNthOptions):
-    def __init__(self, int64_t pivot):
-        self._set_options(pivot)
-
-
-cdef class _ProjectOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CProjectOptions] project_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.project_options.get()
-
-    def _set_options(self, field_names):
-        cdef:
-            vector[c_string] c_field_names
-        for n in field_names:
-            c_field_names.push_back(tobytes(n))
-        self.project_options.reset(new CProjectOptions(field_names))
-
-
-class ProjectOptions(_ProjectOptions):
-    def __init__(self, field_names):
-        self._set_options(field_names)
-
-
-cdef class _MinMaxOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CMinMaxOptions] min_max_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.min_max_options.get()
-
-    def _set_options(self, null_handling):
-        if null_handling == 'skip':
-            self.min_max_options.reset(
-                new CMinMaxOptions(CMinMaxMode_SKIP))
-        elif null_handling == 'emit_null':
-            self.min_max_options.reset(
-                new CMinMaxOptions(CMinMaxMode_EMIT_NULL))
-        else:
-            raise ValueError(
-                '{!r} is not a valid null_handling'
-                .format(null_handling))
-
-
-class MinMaxOptions(_MinMaxOptions):
-    def __init__(self, null_handling='skip'):
-        self._set_options(null_handling)
-
-
-cdef class _CountOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CCountOptions] count_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.count_options.get()
-
-    def _set_options(self, count_mode):
-        if count_mode == 'count_null':
-            self.count_options.reset(
-                new CCountOptions(CCountMode_COUNT_NULL))
-        elif count_mode == 'count_non_null':
-            self.count_options.reset(
-                new CCountOptions(CCountMode_COUNT_NON_NULL))
-        else:
-            raise ValueError(
-                '{!r} is not a valid count_mode'
-                .format(count_mode))
-
-
-class CountOptions(_CountOptions):
-    def __init__(self, count_mode='count_non_null'):
-        self._set_options(count_mode)
-
-
-cdef class _ModeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CModeOptions] mode_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.mode_options.get()
-
-    def _set_options(self, n):
-        self.mode_options.reset(new CModeOptions(n))
-
-
-class ModeOptions(_ModeOptions):
-    def __init__(self, n=1):
-        self._set_options(n)
-
-
-cdef class _SetLookupOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSetLookupOptions] set_lookup_options
-        unique_ptr[CDatum] valset
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.set_lookup_options.get()
-
-    def _set_options(self, value_set, c_bool skip_nulls):
-        if isinstance(value_set, Array):
-            self.valset.reset(new CDatum((<Array> value_set).sp_array))
-        elif isinstance(value_set, ChunkedArray):
-            self.valset.reset(
-                new CDatum((<ChunkedArray> value_set).sp_chunked_array)
-            )
-        elif isinstance(value_set, Scalar):
-            self.valset.reset(new CDatum((<Scalar> value_set).unwrap()))
-        else:
-            raise ValueError('"{}" is not a valid value_set'.format(value_set))
-
-        self.set_lookup_options.reset(
-            new CSetLookupOptions(deref(self.valset), skip_nulls)
-        )
-
-
-class SetLookupOptions(_SetLookupOptions):
-    def __init__(self, *, value_set, skip_nulls=False):
-        self._set_options(value_set, skip_nulls)
-
-
-cdef class _StrptimeOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CStrptimeOptions] strptime_options
-        TimeUnit time_unit
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.strptime_options.get()
-
-    def _set_options(self, format, unit):
-        if unit == 's':
-            self.time_unit = TimeUnit_SECOND
-        elif unit == 'ms':
-            self.time_unit = TimeUnit_MILLI
-        elif unit == 'us':
-            self.time_unit = TimeUnit_MICRO
-        elif unit == 'ns':
-            self.time_unit = TimeUnit_NANO
-        else:
-            raise ValueError('"{}" is not a valid time unit'.format(unit))
-
-        self.strptime_options.reset(
-            new CStrptimeOptions(tobytes(format), self.time_unit)
-        )
-
-
-class StrptimeOptions(_StrptimeOptions):
-    def __init__(self, format, unit):
-        self._set_options(format, unit)
-
-
-cdef class _VarianceOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CVarianceOptions] variance_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.variance_options.get()
-
-    def _set_options(self, ddof):
-        self.variance_options.reset(new CVarianceOptions(ddof))
-
-
-class VarianceOptions(_VarianceOptions):
-    def __init__(self, *, ddof=0):
-        self._set_options(ddof)
-
-
-cdef class _SplitOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSplitOptions] split_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.split_options.get()
-
-    def _set_options(self, max_splits, reverse):
-        self.split_options.reset(
-            new CSplitOptions(max_splits, reverse))
-
-
-class SplitOptions(_SplitOptions):
-    def __init__(self, *, max_splits=-1, reverse=False):
-        self._set_options(max_splits, reverse)
-
-
-cdef class _SplitPatternOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSplitPatternOptions] split_pattern_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.split_pattern_options.get()
-
-    def _set_options(self, pattern, max_splits, reverse):
-        self.split_pattern_options.reset(
-            new CSplitPatternOptions(tobytes(pattern), max_splits, reverse))
-
-
-class SplitPatternOptions(_SplitPatternOptions):
-    def __init__(self, *, pattern, max_splits=-1, reverse=False):
-        self._set_options(pattern, max_splits, reverse)
-
-
-cdef class _ArraySortOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CArraySortOptions] array_sort_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.array_sort_options.get()
-
-    def _set_options(self, order):
-        if order == "ascending":
-            self.array_sort_options.reset(
-                new CArraySortOptions(CSortOrder_Ascending))
-        elif order == "descending":
-            self.array_sort_options.reset(
-                new CArraySortOptions(CSortOrder_Descending))
-        else:
-            raise ValueError(
-                "{!r} is not a valid order".format(order)
-            )
-
-
-class ArraySortOptions(_ArraySortOptions):
-    def __init__(self, *, order='ascending'):
-        self._set_options(order)
-
-
-cdef class _SortOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CSortOptions] sort_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.sort_options.get()
-
-    def _set_options(self, sort_keys):
-        cdef:
-            vector[CSortKey] c_sort_keys
-            c_string c_name
-            CSortOrder c_order
-
-        for name, order in sort_keys:
-            if order == "ascending":
-                c_order = CSortOrder_Ascending
-            elif order == "descending":
-                c_order = CSortOrder_Descending
-            else:
-                raise ValueError(
-                    "{!r} is not a valid order".format(order)
-                )
-            c_name = tobytes(name)
-            c_sort_keys.push_back(CSortKey(c_name, c_order))
-
-        self.sort_options.reset(new CSortOptions(c_sort_keys))
-
-
-class SortOptions(_SortOptions):
-    def __init__(self, sort_keys=None):
-        if sort_keys is None:
-            sort_keys = []
-        self._set_options(sort_keys)
-
-
-cdef class _QuantileOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CQuantileOptions] quantile_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.quantile_options.get()
-
-    def _set_options(self, quantiles, interp):
-        interp_dict = {
-            'linear': CQuantileInterp_LINEAR,
-            'lower': CQuantileInterp_LOWER,
-            'higher': CQuantileInterp_HIGHER,
-            'nearest': CQuantileInterp_NEAREST,
-            'midpoint': CQuantileInterp_MIDPOINT,
-        }
-        if interp not in interp_dict:
-            raise ValueError(
-                '{!r} is not a valid interpolation'
-                .format(interp))
-        self.quantile_options.reset(
-            new CQuantileOptions(quantiles, interp_dict[interp]))
-
-
-class QuantileOptions(_QuantileOptions):
-    def __init__(self, *, q=0.5, interpolation='linear'):
-        if not isinstance(q, (list, tuple, np.ndarray)):
-            q = [q]
-        self._set_options(q, interpolation)
-
-
-cdef class _TDigestOptions(FunctionOptions):
-    cdef:
-        unique_ptr[CTDigestOptions] tdigest_options
-
-    cdef const CFunctionOptions* get_options(self) except NULL:
-        return self.tdigest_options.get()
-
-    def _set_options(self, quantiles, delta, buffer_size):
-        self.tdigest_options.reset(
-            new CTDigestOptions(quantiles, delta, buffer_size))
-
-
-class TDigestOptions(_TDigestOptions):
-    def __init__(self, *, q=0.5, delta=100, buffer_size=500):
-        if not isinstance(q, (list, tuple, np.ndarray)):
-            q = [q]
-        self._set_options(q, delta, buffer_size)
diff --git a/python/pyarrow/_csv.pxd b/python/pyarrow/_csv.pxd
deleted file mode 100644
index f8e12f1..0000000
--- a/python/pyarrow/_csv.pxd
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from pyarrow.includes.libarrow cimport *
-from pyarrow.lib cimport _Weakrefable
-
-
-cdef class ConvertOptions(_Weakrefable):
-    cdef:
-        CCSVConvertOptions options
-
-    @staticmethod
-    cdef ConvertOptions wrap(CCSVConvertOptions options)
-
-
-cdef class ParseOptions(_Weakrefable):
-    cdef:
-        CCSVParseOptions options
-
-    @staticmethod
-    cdef ParseOptions wrap(CCSVParseOptions options)
-
-
-cdef class ReadOptions(_Weakrefable):
-    cdef:
-        CCSVReadOptions options
-        public object encoding
-
-    @staticmethod
-    cdef ReadOptions wrap(CCSVReadOptions options)
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
deleted file mode 100644
index a98160c..0000000
--- a/python/pyarrow/_csv.pyx
+++ /dev/null
@@ -1,952 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-# cython: language_level = 3
-
-from cython.operator cimport dereference as deref
-
-import codecs
-from collections.abc import Mapping
-
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-from pyarrow.lib cimport (check_status, Field, MemoryPool, Schema,
-                          RecordBatchReader, ensure_type,
-                          maybe_unbox_memory_pool, get_input_stream,
-                          get_writer, native_transcoding_input_stream,
-                          pyarrow_unwrap_batch, pyarrow_unwrap_table,
-                          pyarrow_wrap_schema, pyarrow_wrap_table,
-                          pyarrow_wrap_data_type, pyarrow_unwrap_data_type,
-                          Table, RecordBatch, StopToken)
-from pyarrow.lib import frombytes, tobytes, SignalStopHandler
-from pyarrow.util import _stringify_path
-
-
-cdef unsigned char _single_char(s) except 0:
-    val = ord(s)
-    if val == 0 or val > 127:
-        raise ValueError("Expecting an ASCII character")
-    return <unsigned char> val
-
-
-cdef class ReadOptions(_Weakrefable):
-    """
-    Options for reading CSV files.
-
-    Parameters
-    ----------
-    use_threads : bool, optional (default True)
-        Whether to use multiple threads to accelerate reading
-    block_size : int, optional
-        How much bytes to process at a time from the input stream.
-        This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
-    skip_rows: int, optional (default 0)
-        The number of rows to skip before the column names (if any)
-        and the CSV data.
-    column_names: list, optional
-        The column names of the target table.  If empty, fall back on
-        `autogenerate_column_names`.
-    autogenerate_column_names: bool, optional (default False)
-        Whether to autogenerate column names if `column_names` is empty.
-        If true, column names will be of the form "f0", "f1"...
-        If false, column names will be read from the first CSV row
-        after `skip_rows`.
-    encoding: str, optional (default 'utf8')
-        The character encoding of the CSV data.  Columns that cannot
-        decode using this encoding can still be read as Binary.
-    """
-
-    # Avoid mistakingly creating attributes
-    __slots__ = ()
-
-    def __init__(self, *, use_threads=None, block_size=None, skip_rows=None,
-                 column_names=None, autogenerate_column_names=None,
-                 encoding='utf8'):
-        self.options = CCSVReadOptions.Defaults()
-        if use_threads is not None:
-            self.use_threads = use_threads
-        if block_size is not None:
-            self.block_size = block_size
-        if skip_rows is not None:
-            self.skip_rows = skip_rows
-        if column_names is not None:
-            self.column_names = column_names
-        if autogenerate_column_names is not None:
-            self.autogenerate_column_names= autogenerate_column_names
-        # Python-specific option
-        self.encoding = encoding
-
-    @property
-    def use_threads(self):
-        """
-        Whether to use multiple threads to accelerate reading.
-        """
-        return self.options.use_threads
-
-    @use_threads.setter
-    def use_threads(self, value):
-        self.options.use_threads = value
-
-    @property
-    def block_size(self):
-        """
-        How much bytes to process at a time from the input stream.
-        This will determine multi-threading granularity as well as
-        the size of individual chunks in the Table.
-        """
-        return self.options.block_size
-
-    @block_size.setter
-    def block_size(self, value):
-        self.options.block_size = value
-
-    @property
-    def skip_rows(self):
-        """
-        The number of rows to skip before the column names (if any)
-        and the CSV data.
-        """
-        return self.options.skip_rows
-
-    @skip_rows.setter
-    def skip_rows(self, value):
-        self.options.skip_rows = value
-
-    @property
-    def column_names(self):
-        """
-        The column names of the target table.  If empty, fall back on
-        `autogenerate_column_names`.
-        """
-        return [frombytes(s) for s in self.options.column_names]
-
-    @column_names.setter
-    def column_names(self, value):
-        self.options.column_names.clear()
-        for item in value:
-            self.options.column_names.push_back(tobytes(item))
-
-    @property
-    def autogenerate_column_names(self):
-        """
-        Whether to autogenerate column names if `column_names` is empty.
-        If true, column names will be of the form "f0", "f1"...
-        If false, column names will be read from the first CSV row
-        after `skip_rows`.
-        """
-        return self.options.autogenerate_column_names
-
-    @autogenerate_column_names.setter
-    def autogenerate_column_names(self, value):
-        self.options.autogenerate_column_names = value
-
-    def equals(self, ReadOptions other):
-        return (
-            self.use_threads == other.use_threads and
-            self.block_size == other.block_size and
-            self.skip_rows == other.skip_rows and
-            self.column_names == other.column_names and
-            self.autogenerate_column_names ==
-            other.autogenerate_column_names and
-            self.encoding == other.encoding
-        )
-
-    @staticmethod
-    cdef ReadOptions wrap(CCSVReadOptions options):
-        out = ReadOptions()
-        out.options = options
-        out.encoding = 'utf8'  # No way to know this
-        return out
-
-    def __getstate__(self):
-        return (self.use_threads, self.block_size, self.skip_rows,
-                self.column_names, self.autogenerate_column_names,
-                self.encoding)
-
-    def __setstate__(self, state):
-        (self.use_threads, self.block_size, self.skip_rows,
-         self.column_names, self.autogenerate_column_names,
-         self.encoding) = state
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return False
-
-
-cdef class ParseOptions(_Weakrefable):
-    """
-    Options for parsing CSV files.
-
-    Parameters
-    ----------
-    delimiter: 1-character string, optional (default ',')
-        The character delimiting individual cells in the CSV data.
-    quote_char: 1-character string or False, optional (default '"')
-        The character used optionally for quoting CSV values
-        (False if quoting is not allowed).
-    double_quote: bool, optional (default True)
-        Whether two quotes in a quoted CSV value denote a single quote
-        in the data.
-    escape_char: 1-character string or False, optional (default False)
-        The character used optionally for escaping special characters
-        (False if escaping is not allowed).
-    newlines_in_values: bool, optional (default False)
-        Whether newline characters are allowed in CSV values.
-        Setting this to True reduces the performance of multi-threaded
-        CSV reading.
-    ignore_empty_lines: bool, optional (default True)
-        Whether empty lines are ignored in CSV input.
-        If False, an empty line is interpreted as containing a single empty
-        value (assuming a one-column CSV file).
-    """
-    __slots__ = ()
-
-    def __init__(self, *, delimiter=None, quote_char=None, double_quote=None,
-                 escape_char=None, newlines_in_values=None,
-                 ignore_empty_lines=None):
-        self.options = CCSVParseOptions.Defaults()
-        if delimiter is not None:
-            self.delimiter = delimiter
-        if quote_char is not None:
-            self.quote_char = quote_char
-        if double_quote is not None:
-            self.double_quote = double_quote
-        if escape_char is not None:
-            self.escape_char = escape_char
-        if newlines_in_values is not None:
-            self.newlines_in_values = newlines_in_values
-        if ignore_empty_lines is not None:
-            self.ignore_empty_lines = ignore_empty_lines
-
-    @property
-    def delimiter(self):
-        """
-        The character delimiting individual cells in the CSV data.
-        """
-        return chr(self.options.delimiter)
-
-    @delimiter.setter
-    def delimiter(self, value):
-        self.options.delimiter = _single_char(value)
-
-    @property
-    def quote_char(self):
-        """
-        The character used optionally for quoting CSV values
-        (False if quoting is not allowed).
-        """
-        if self.options.quoting:
-            return chr(self.options.quote_char)
-        else:
-            return False
-
-    @quote_char.setter
-    def quote_char(self, value):
-        if value is False:
-            self.options.quoting = False
-        else:
-            self.options.quote_char = _single_char(value)
-            self.options.quoting = True
-
-    @property
-    def double_quote(self):
-        """
-        Whether two quotes in a quoted CSV value denote a single quote
-        in the data.
-        """
-        return self.options.double_quote
-
-    @double_quote.setter
-    def double_quote(self, value):
-        self.options.double_quote = value
-
-    @property
-    def escape_char(self):
-        """
-        The character used optionally for escaping special characters
-        (False if escaping is not allowed).
-        """
-        if self.options.escaping:
-            return chr(self.options.escape_char)
-        else:
-            return False
-
-    @escape_char.setter
-    def escape_char(self, value):
-        if value is False:
-            self.options.escaping = False
-        else:
-            self.options.escape_char = _single_char(value)
-            self.options.escaping = True
-
-    @property
-    def newlines_in_values(self):
-        """
-        Whether newline characters are allowed in CSV values.
-        Setting this to True reduces the performance of multi-threaded
-        CSV reading.
-        """
-        return self.options.newlines_in_values
-
-    @newlines_in_values.setter
-    def newlines_in_values(self, value):
-        self.options.newlines_in_values = value
-
-    @property
-    def ignore_empty_lines(self):
-        """
-        Whether empty lines are ignored in CSV input.
-        If False, an empty line is interpreted as containing a single empty
-        value (assuming a one-column CSV file).
-        """
-        return self.options.ignore_empty_lines
-
-    @ignore_empty_lines.setter
-    def ignore_empty_lines(self, value):
-        self.options.ignore_empty_lines = value
-
-    def equals(self, ParseOptions other):
-        return (
-            self.delimiter == other.delimiter and
-            self.quote_char == other.quote_char and
-            self.double_quote == other.double_quote and
-            self.escape_char == other.escape_char and
-            self.newlines_in_values == other.newlines_in_values and
-            self.ignore_empty_lines == other.ignore_empty_lines
-        )
-
-    @staticmethod
-    cdef ParseOptions wrap(CCSVParseOptions options):
-        out = ParseOptions()
-        out.options = options
-        return out
-
-    def __getstate__(self):
-        return (self.delimiter, self.quote_char, self.double_quote,
-                self.escape_char, self.newlines_in_values,
-                self.ignore_empty_lines)
-
-    def __setstate__(self, state):
-        (self.delimiter, self.quote_char, self.double_quote,
-         self.escape_char, self.newlines_in_values,
-         self.ignore_empty_lines) = state
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return False
-
-
-cdef class _ISO8601(_Weakrefable):
-    """
-    A special object indicating ISO-8601 parsing.
-    """
-    __slots__ = ()
-
-    def __str__(self):
-        return 'ISO8601'
-
-    def __eq__(self, other):
-        return isinstance(other, _ISO8601)
-
-
-ISO8601 = _ISO8601()
-
-
-cdef class ConvertOptions(_Weakrefable):
-    """
-    Options for converting CSV data.
-
-    Parameters
-    ----------
-    check_utf8 : bool, optional (default True)
-        Whether to check UTF8 validity of string columns.
-    column_types: pa.Schema or dict, optional
-        Explicitly map column names to column types. Passing this argument
-        disables type inference on the defined columns.
-    null_values: list, optional
-        A sequence of strings that denote nulls in the data
-        (defaults are appropriate in most cases). Note that by default,
-        string columns are not checked for null values. To enable
-        null checking for those, specify ``strings_can_be_null=True``.
-    true_values: list, optional
-        A sequence of strings that denote true booleans in the data
-        (defaults are appropriate in most cases).
-    false_values: list, optional
-        A sequence of strings that denote false booleans in the data
-        (defaults are appropriate in most cases).
-    timestamp_parsers: list, optional
-        A sequence of strptime()-compatible format strings, tried in order
-        when attempting to infer or convert timestamp values (the special
-        value ISO8601() can also be given).  By default, a fast built-in
-        ISO-8601 parser is used.
-    strings_can_be_null: bool, optional (default False)
-        Whether string / binary columns can have null values.
-        If true, then strings in null_values are considered null for
-        string columns.
-        If false, then all strings are valid string values.
-    auto_dict_encode: bool, optional (default False)
-        Whether to try to automatically dict-encode string / binary data.
-        If true, then when type inference detects a string or binary column,
-        it it dict-encoded up to `auto_dict_max_cardinality` distinct values
-        (per chunk), after which it switches to regular encoding.
-        This setting is ignored for non-inferred columns (those in
-        `column_types`).
-    auto_dict_max_cardinality: int, optional
-        The maximum dictionary cardinality for `auto_dict_encode`.
-        This value is per chunk.
-    include_columns: list, optional
-        The names of columns to include in the Table.
-        If empty, the Table will include all columns from the CSV file.
-        If not empty, only these columns will be included, in this order.
-    include_missing_columns: bool, optional (default False)
-        If false, columns in `include_columns` but not in the CSV file will
-        error out.
-        If true, columns in `include_columns` but not in the CSV file will
-        produce a column of nulls (whose type is selected using
-        `column_types`, or null by default).
-        This option is ignored if `include_columns` is empty.
-    """
-    # Avoid mistakingly creating attributes
-    __slots__ = ()
-
-    def __init__(self, *, check_utf8=None, column_types=None, null_values=None,
-                 true_values=None, false_values=None,
-                 strings_can_be_null=None, include_columns=None,
-                 include_missing_columns=None, auto_dict_encode=None,
-                 auto_dict_max_cardinality=None, timestamp_parsers=None):
-        self.options = CCSVConvertOptions.Defaults()
-        if check_utf8 is not None:
-            self.check_utf8 = check_utf8
-        if column_types is not None:
-            self.column_types = column_types
-        if null_values is not None:
-            self.null_values = null_values
-        if true_values is not None:
-            self.true_values = true_values
-        if false_values is not None:
-            self.false_values = false_values
-        if strings_can_be_null is not None:
-            self.strings_can_be_null = strings_can_be_null
-        if include_columns is not None:
-            self.include_columns = include_columns
-        if include_missing_columns is not None:
-            self.include_missing_columns = include_missing_columns
-        if auto_dict_encode is not None:
-            self.auto_dict_encode = auto_dict_encode
-        if auto_dict_max_cardinality is not None:
-            self.auto_dict_max_cardinality = auto_dict_max_cardinality
-        if timestamp_parsers is not None:
-            self.timestamp_parsers = timestamp_parsers
-
-    @property
-    def check_utf8(self):
-        """
-        Whether to check UTF8 validity of string columns.
-        """
-        return self.options.check_utf8
-
-    @check_utf8.setter
-    def check_utf8(self, value):
-        self.options.check_utf8 = value
-
-    @property
-    def strings_can_be_null(self):
-        """
-        Whether string / binary columns can have null values.
-        """
-        return self.options.strings_can_be_null
-
-    @strings_can_be_null.setter
-    def strings_can_be_null(self, value):
-        self.options.strings_can_be_null = value
-
-    @property
-    def column_types(self):
-        """
-        Explicitly map column names to column types.
-        """
-        d = {frombytes(item.first): pyarrow_wrap_data_type(item.second)
-             for item in self.options.column_types}
-        return d
-
-    @column_types.setter
-    def column_types(self, value):
-        cdef:
-            shared_ptr[CDataType] typ
-
-        if isinstance(value, Mapping):
-            value = value.items()
-
-        self.options.column_types.clear()
-        for item in value:
-            if isinstance(item, Field):
-                k = item.name
-                v = item.type
-            else:
-                k, v = item
-            typ = pyarrow_unwrap_data_type(ensure_type(v))
-            assert typ != NULL
-            self.options.column_types[tobytes(k)] = typ
-
-    @property
-    def null_values(self):
-        """
-        A sequence of strings that denote nulls in the data.
-        """
-        return [frombytes(x) for x in self.options.null_values]
-
-    @null_values.setter
-    def null_values(self, value):
-        self.options.null_values = [tobytes(x) for x in value]
-
-    @property
-    def true_values(self):
-        """
-        A sequence of strings that denote true booleans in the data.
-        """
-        return [frombytes(x) for x in self.options.true_values]
-
-    @true_values.setter
-    def true_values(self, value):
-        self.options.true_values = [tobytes(x) for x in value]
-
-    @property
-    def false_values(self):
-        """
-        A sequence of strings that denote false booleans in the data.
-        """
-        return [frombytes(x) for x in self.options.false_values]
-
-    @false_values.setter
-    def false_values(self, value):
-        self.options.false_values = [tobytes(x) for x in value]
-
-    @property
-    def auto_dict_encode(self):
-        """
-        Whether to try to automatically dict-encode string / binary data.
-        """
-        return self.options.auto_dict_encode
-
-    @auto_dict_encode.setter
-    def auto_dict_encode(self, value):
-        self.options.auto_dict_encode = value
-
-    @property
-    def auto_dict_max_cardinality(self):
-        """
-        The maximum dictionary cardinality for `auto_dict_encode`.
-
-        This value is per chunk.
-        """
-        return self.options.auto_dict_max_cardinality
-
-    @auto_dict_max_cardinality.setter
-    def auto_dict_max_cardinality(self, value):
-        self.options.auto_dict_max_cardinality = value
-
-    @property
-    def include_columns(self):
-        """
-        The names of columns to include in the Table.
-
-        If empty, the Table will include all columns from the CSV file.
-        If not empty, only these columns will be included, in this order.
-        """
-        return [frombytes(s) for s in self.options.include_columns]
-
-    @include_columns.setter
-    def include_columns(self, value):
-        self.options.include_columns.clear()
-        for item in value:
-            self.options.include_columns.push_back(tobytes(item))
-
-    @property
-    def include_missing_columns(self):
-        """
-        If false, columns in `include_columns` but not in the CSV file will
-        error out.
-        If true, columns in `include_columns` but not in the CSV file will
-        produce a null column (whose type is selected using `column_types`,
-        or null by default).
-        This option is ignored if `include_columns` is empty.
-        """
-        return self.options.include_missing_columns
-
-    @include_missing_columns.setter
-    def include_missing_columns(self, value):
-        self.options.include_missing_columns = value
-
-    @property
-    def timestamp_parsers(self):
-        """
-        A sequence of strptime()-compatible format strings, tried in order
-        when attempting to infer or convert timestamp values (the special
-        value ISO8601() can also be given).  By default, a fast built-in
-        ISO-8601 parser is used.
-        """
-        cdef:
-            shared_ptr[CTimestampParser] c_parser
-            c_string kind
-
-        parsers = []
-        for c_parser in self.options.timestamp_parsers:
-            kind = deref(c_parser).kind()
-            if kind == b'strptime':
-                parsers.append(frombytes(deref(c_parser).format()))
-            else:
-                assert kind == b'iso8601'
-                parsers.append(ISO8601)
-
-        return parsers
-
-    @timestamp_parsers.setter
-    def timestamp_parsers(self, value):
-        cdef:
-            vector[shared_ptr[CTimestampParser]] c_parsers
-
-        for v in value:
-            if isinstance(v, str):
-                c_parsers.push_back(CTimestampParser.MakeStrptime(tobytes(v)))
-            elif v == ISO8601:
-                c_parsers.push_back(CTimestampParser.MakeISO8601())
-            else:
-                raise TypeError("Expected list of str or ISO8601 objects")
-
-        self.options.timestamp_parsers = move(c_parsers)
-
-    @staticmethod
-    cdef ConvertOptions wrap(CCSVConvertOptions options):
-        out = ConvertOptions()
-        out.options = options
-        return out
-
-    def equals(self, ConvertOptions other):
-        return (
-            self.check_utf8 == other.check_utf8 and
-            self.column_types == other.column_types and
-            self.null_values == other.null_values and
-            self.true_values == other.true_values and
-            self.false_values == other.false_values and
-            self.timestamp_parsers == other.timestamp_parsers and
-            self.strings_can_be_null == other.strings_can_be_null and
-            self.auto_dict_encode == other.auto_dict_encode and
-            self.auto_dict_max_cardinality ==
-            other.auto_dict_max_cardinality and
-            self.include_columns == other.include_columns and
-            self.include_missing_columns == other.include_missing_columns
-        )
-
-    def __getstate__(self):
-        return (self.check_utf8, self.column_types, self.null_values,
-                self.true_values, self.false_values, self.timestamp_parsers,
-                self.strings_can_be_null, self.auto_dict_encode,
-                self.auto_dict_max_cardinality, self.include_columns,
-                self.include_missing_columns)
-
-    def __setstate__(self, state):
-        (self.check_utf8, self.column_types, self.null_values,
-         self.true_values, self.false_values, self.timestamp_parsers,
-         self.strings_can_be_null, self.auto_dict_encode,
-         self.auto_dict_max_cardinality, self.include_columns,
-         self.include_missing_columns) = state
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return False
-
-
-cdef _get_reader(input_file, ReadOptions read_options,
-                 shared_ptr[CInputStream]* out):
-    use_memory_map = False
-    get_input_stream(input_file, use_memory_map, out)
-    if read_options is not None:
-        out[0] = native_transcoding_input_stream(out[0],
-                                                 read_options.encoding,
-                                                 'utf8')
-
-
-cdef _get_read_options(ReadOptions read_options, CCSVReadOptions* out):
-    if read_options is None:
-        out[0] = CCSVReadOptions.Defaults()
-    else:
-        out[0] = read_options.options
-
-
-cdef _get_parse_options(ParseOptions parse_options, CCSVParseOptions* out):
-    if parse_options is None:
-        out[0] = CCSVParseOptions.Defaults()
-    else:
-        out[0] = parse_options.options
-
-
-cdef _get_convert_options(ConvertOptions convert_options,
-                          CCSVConvertOptions* out):
-    if convert_options is None:
-        out[0] = CCSVConvertOptions.Defaults()
-    else:
-        out[0] = convert_options.options
-
-
-cdef class CSVStreamingReader(RecordBatchReader):
-    """An object that reads record batches incrementally from a CSV file.
-
-    Should not be instantiated directly by user code.
-    """
-    cdef readonly:
-        Schema schema
-
-    def __init__(self):
-        raise TypeError("Do not call {}'s constructor directly, "
-                        "use pyarrow.csv.open_csv() instead."
-                        .format(self.__class__.__name__))
-
-    # Note about cancellation: we cannot create a SignalStopHandler
-    # by default here, as several CSVStreamingReader instances may be
-    # created (including by the same thread).  Handling cancellation
-    # would require having the user pass the SignalStopHandler.
-    # (in addition to solving ARROW-11853)
-
-    cdef _open(self, shared_ptr[CInputStream] stream,
-               CCSVReadOptions c_read_options,
-               CCSVParseOptions c_parse_options,
-               CCSVConvertOptions c_convert_options,
-               MemoryPool memory_pool):
-        cdef:
-            shared_ptr[CSchema] c_schema
-            CIOContext io_context
-
-        io_context = CIOContext(maybe_unbox_memory_pool(memory_pool))
-
-        with nogil:
-            self.reader = <shared_ptr[CRecordBatchReader]> GetResultValue(
-                CCSVStreamingReader.Make(
-                    io_context, stream,
-                    move(c_read_options), move(c_parse_options),
-                    move(c_convert_options)))
-            c_schema = self.reader.get().schema()
-
-        self.schema = pyarrow_wrap_schema(c_schema)
-
-
-def read_csv(input_file, read_options=None, parse_options=None,
-             convert_options=None, MemoryPool memory_pool=None):
-    """
-    Read a Table from a stream of CSV data.
-
-    Parameters
-    ----------
-    input_file: string, path or file-like object
-        The location of CSV data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options: pyarrow.csv.ReadOptions, optional
-        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
-        for defaults)
-    parse_options: pyarrow.csv.ParseOptions, optional
-        Options for the CSV parser
-        (see pyarrow.csv.ParseOptions constructor for defaults)
-    convert_options: pyarrow.csv.ConvertOptions, optional
-        Options for converting CSV data
-        (see pyarrow.csv.ConvertOptions constructor for defaults)
-    memory_pool: MemoryPool, optional
-        Pool to allocate Table memory from
-
-    Returns
-    -------
-    :class:`pyarrow.Table`
-        Contents of the CSV file as a in-memory table.
-    """
-    cdef:
-        shared_ptr[CInputStream] stream
-        CCSVReadOptions c_read_options
-        CCSVParseOptions c_parse_options
-        CCSVConvertOptions c_convert_options
-        CIOContext io_context
-        shared_ptr[CCSVReader] reader
-        shared_ptr[CTable] table
-
-    _get_reader(input_file, read_options, &stream)
-    _get_read_options(read_options, &c_read_options)
-    _get_parse_options(parse_options, &c_parse_options)
-    _get_convert_options(convert_options, &c_convert_options)
-
-    with SignalStopHandler() as stop_handler:
-        io_context = CIOContext(
-            maybe_unbox_memory_pool(memory_pool),
-            (<StopToken> stop_handler.stop_token).stop_token)
-        reader = GetResultValue(CCSVReader.Make(
-            io_context, stream,
-            c_read_options, c_parse_options, c_convert_options))
-
-        with nogil:
-            table = GetResultValue(reader.get().Read())
-
-    return pyarrow_wrap_table(table)
-
-
-def open_csv(input_file, read_options=None, parse_options=None,
-             convert_options=None, MemoryPool memory_pool=None):
-    """
-    Open a streaming reader of CSV data.
-
-    Reading using this function is always single-threaded.
-
-    Parameters
-    ----------
-    input_file: string, path or file-like object
-        The location of CSV data.  If a string or path, and if it ends
-        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
-        the data is automatically decompressed when reading.
-    read_options: pyarrow.csv.ReadOptions, optional
-        Options for the CSV reader (see pyarrow.csv.ReadOptions constructor
-        for defaults)
-    parse_options: pyarrow.csv.ParseOptions, optional
-        Options for the CSV parser
-        (see pyarrow.csv.ParseOptions constructor for defaults)
-    convert_options: pyarrow.csv.ConvertOptions, optional
-        Options for converting CSV data
-        (see pyarrow.csv.ConvertOptions constructor for defaults)
-    memory_pool: MemoryPool, optional
-        Pool to allocate Table memory from
-
-    Returns
-    -------
-    :class:`pyarrow.csv.CSVStreamingReader`
-    """
-    cdef:
-        shared_ptr[CInputStream] stream
-        CCSVReadOptions c_read_options
-        CCSVParseOptions c_parse_options
-        CCSVConvertOptions c_convert_options
-        CSVStreamingReader reader
-
-    _get_reader(input_file, read_options, &stream)
-    _get_read_options(read_options, &c_read_options)
-    _get_parse_options(parse_options, &c_parse_options)
-    _get_convert_options(convert_options, &c_convert_options)
-
-    reader = CSVStreamingReader.__new__(CSVStreamingReader)
-    reader._open(stream, move(c_read_options), move(c_parse_options),
-                 move(c_convert_options), memory_pool)
-    return reader
-
-
-cdef class WriteOptions(_Weakrefable):
-    """
-    Options for writing CSV files.
-
-    Parameters
-    ----------
-    include_header : bool, optional (default True)
-        Whether to write an initial header line with column names
-    batch_size : int, optional (default 1024)
-        How many rows to process together when converting and writing
-        CSV data
-    """
-    cdef:
-        CCSVWriteOptions options
-
-    # Avoid mistakingly creating attributes
-    __slots__ = ()
-
-    def __init__(self, *, include_header=None, batch_size=None):
-        self.options = CCSVWriteOptions.Defaults()
-        if include_header is not None:
-            self.include_header = include_header
-        if batch_size is not None:
-            self.batch_size = batch_size
-
-    @property
-    def include_header(self):
-        """
-        Whether to write an initial header line with column names.
-        """
-        return self.options.include_header
-
-    @include_header.setter
-    def include_header(self, value):
-        self.options.include_header = value
-
-    @property
-    def batch_size(self):
-        """
-        How many rows to process together when converting and writing
-        CSV data.
-        """
-        return self.options.batch_size
-
-    @batch_size.setter
-    def batch_size(self, value):
-        self.options.batch_size = value
-
-
-cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
-    if write_options is None:
-        out[0] = CCSVWriteOptions.Defaults()
-    else:
-        out[0] = write_options.options
-
-
-def write_csv(data, output_file, write_options=None,
-              MemoryPool memory_pool=None):
-    """
-    Write record batch or table to a CSV file.
-
-    Parameters
-    ----------
-    data: pyarrow.RecordBatch or pyarrow.Table
-        The data to write.
-    output_file: string, path, pyarrow.OutputStream or file-like object
-        The location where to write the CSV data.
-    write_options: pyarrow.csv.WriteOptions
-        Options to configure writing the CSV data.
-    memory_pool: MemoryPool, optional
-        Pool for temporary allocations.
-    """
-    cdef:
-        shared_ptr[COutputStream] stream
-        CCSVWriteOptions c_write_options
-        CMemoryPool* c_memory_pool
-        CRecordBatch* batch
-        CTable* table
-    _get_write_options(write_options, &c_write_options)
-
-    get_writer(output_file, &stream)
-    c_memory_pool = maybe_unbox_memory_pool(memory_pool)
-    if isinstance(data, RecordBatch):
-        batch = pyarrow_unwrap_batch(data).get()
-        with nogil:
-            check_status(WriteCSV(deref(batch), c_write_options, c_memory_pool,
-                                  stream.get()))
-    elif isinstance(data, Table):
-        table = pyarrow_unwrap_table(data).get()
-        with nogil:
-            check_status(WriteCSV(deref(table), c_write_options, c_memory_pool,
-                                  stream.get()))
-    else:
-        raise TypeError(f"Expected Table or RecordBatch, got '{type(data)}'")
diff --git a/python/pyarrow/_cuda.pxd b/python/pyarrow/_cuda.pxd
deleted file mode 100644
index 6acb882..0000000
--- a/python/pyarrow/_cuda.pxd
+++ /dev/null
@@ -1,67 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from pyarrow.lib cimport *
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.libarrow_cuda cimport *
-
-
-cdef class Context(_Weakrefable):
-    cdef:
-        shared_ptr[CCudaContext] context
-        int device_number
-
-    cdef void init(self, const shared_ptr[CCudaContext]& ctx)
-
-
-cdef class IpcMemHandle(_Weakrefable):
-    cdef:
-        shared_ptr[CCudaIpcMemHandle] handle
-
-    cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h)
-
-
-cdef class CudaBuffer(Buffer):
-    cdef:
-        shared_ptr[CCudaBuffer] cuda_buffer
-        object base
-
-    cdef void init_cuda(self,
-                        const shared_ptr[CCudaBuffer]& buffer,
-                        object base)
-
-
-cdef class HostBuffer(Buffer):
-    cdef:
-        shared_ptr[CCudaHostBuffer] host_buffer
-
-    cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer)
-
-
-cdef class BufferReader(NativeFile):
-    cdef:
-        CCudaBufferReader* reader
-        CudaBuffer buffer
-
-
-cdef class BufferWriter(NativeFile):
-    cdef:
-        CCudaBufferWriter* writer
-        CudaBuffer buffer
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
deleted file mode 100644
index f4ca763..0000000
--- a/python/pyarrow/_cuda.pyx
+++ /dev/null
@@ -1,1059 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-from pyarrow.lib import tobytes
-from pyarrow.lib cimport *
-from pyarrow.includes.libarrow_cuda cimport *
-from pyarrow.lib import py_buffer, allocate_buffer, as_buffer, ArrowTypeError
-from pyarrow.util import get_contiguous_span
-cimport cpython as cp
-
-
-cdef class Context(_Weakrefable):
-    """
-    CUDA driver context.
-    """
-
-    def __init__(self, *args, **kwargs):
-        """
-        Create a CUDA driver context for a particular device.
-
-        If a CUDA context handle is passed, it is wrapped, otherwise
-        a default CUDA context for the given device is requested.
-
-        Parameters
-        ----------
-        device_number : int (default 0)
-          Specify the GPU device for which the CUDA driver context is
-          requested.
-        handle : int, optional
-          Specify CUDA handle for a shared context that has been created
-          by another library.
-        """
-        # This method exposed because autodoc doesn't pick __cinit__
-
-    def __cinit__(self, int device_number=0, uintptr_t handle=0):
-        cdef CCudaDeviceManager* manager
-        manager = GetResultValue(CCudaDeviceManager.Instance())
-        cdef int n = manager.num_devices()
-        if device_number >= n or device_number < 0:
-            self.context.reset()
-            raise ValueError('device_number argument must be '
-                             'non-negative less than %s' % (n))
-        if handle == 0:
-            self.context = GetResultValue(manager.GetContext(device_number))
-        else:
-            self.context = GetResultValue(manager.GetSharedContext(
-                device_number, <void*>handle))
-        self.device_number = device_number
-
-    @staticmethod
-    def from_numba(context=None):
-        """
-        Create a Context instance from a Numba CUDA context.
-
-        Parameters
-        ----------
-        context : {numba.cuda.cudadrv.driver.Context, None}
-          A Numba CUDA context instance.
-          If None, the current Numba context is used.
-
-        Returns
-        -------
-        shared_context : pyarrow.cuda.Context
-          Context instance.
-        """
-        if context is None:
-            import numba.cuda
-            context = numba.cuda.current_context()
-        return Context(device_number=context.device.id,
-                       handle=context.handle.value)
-
-    def to_numba(self):
-        """
-        Convert Context to a Numba CUDA context.
-
-        Returns
-        -------
-        context : numba.cuda.cudadrv.driver.Context
-          Numba CUDA context instance.
-        """
-        import ctypes
-        import numba.cuda
-        device = numba.cuda.gpus[self.device_number]
-        handle = ctypes.c_void_p(self.handle)
-        context = numba.cuda.cudadrv.driver.Context(device, handle)
-
-        class DummyPendingDeallocs(object):
-            # Context is managed by pyarrow
-            def add_item(self, *args, **kwargs):
-                pass
-
-        context.deallocations = DummyPendingDeallocs()
-        return context
-
-    @staticmethod
-    def get_num_devices():
-        """ Return the number of GPU devices.
-        """
-        cdef CCudaDeviceManager* manager
-        manager = GetResultValue(CCudaDeviceManager.Instance())
-        return manager.num_devices()
-
-    @property
-    def device_number(self):
-        """ Return context device number.
-        """
-        return self.device_number
-
-    @property
-    def handle(self):
-        """ Return pointer to context handle.
-        """
-        return <uintptr_t>self.context.get().handle()
-
-    cdef void init(self, const shared_ptr[CCudaContext]& ctx):
-        self.context = ctx
-
-    def synchronize(self):
-        """Blocks until the device has completed all preceding requested
-        tasks.
-        """
-        check_status(self.context.get().Synchronize())
-
-    @property
-    def bytes_allocated(self):
-        """Return the number of allocated bytes.
-        """
-        return self.context.get().bytes_allocated()
-
-    def get_device_address(self, uintptr_t address):
-        """Return the device address that is reachable from kernels running in
-        the context
-
-        Parameters
-        ----------
-        address : int
-          Specify memory address value
-
-        Returns
-        -------
-        device_address : int
-          Device address accessible from device context
-
-        Notes
-        -----
-        The device address is defined as a memory address accessible
-        by device. While it is often a device memory address but it
-        can be also a host memory address, for instance, when the
-        memory is allocated as host memory (using cudaMallocHost or
-        cudaHostAlloc) or as managed memory (using cudaMallocManaged)
-        or the host memory is page-locked (using cudaHostRegister).
-        """
-        return GetResultValue(self.context.get().GetDeviceAddress(address))
-
-    def new_buffer(self, int64_t nbytes):
-        """Return new device buffer.
-
-        Parameters
-        ----------
-        nbytes : int
-          Specify the number of bytes to be allocated.
-
-        Returns
-        -------
-        buf : CudaBuffer
-          Allocated buffer.
-        """
-        cdef:
-            shared_ptr[CCudaBuffer] cudabuf
-        with nogil:
-            cudabuf = GetResultValue(self.context.get().Allocate(nbytes))
-        return pyarrow_wrap_cudabuffer(cudabuf)
-
-    def foreign_buffer(self, address, size, base=None):
-        """Create device buffer from address and size as a view.
-
-        The caller is responsible for allocating and freeing the
-        memory. When `address==size==0` then a new zero-sized buffer
-        is returned.
-
-        Parameters
-        ----------
-        address : int
-          Specify the starting address of the buffer. The address can
-          refer to both device or host memory but it must be
-          accessible from device after mapping it with
-          `get_device_address` method.
-        size : int
-          Specify the size of device buffer in bytes.
-        base : {None, object}
-          Specify object that owns the referenced memory.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device reachable memory.
-
-        """
-        if not address and size == 0:
-            return self.new_buffer(0)
-        cdef:
-            uintptr_t c_addr = self.get_device_address(address)
-            int64_t c_size = size
-            shared_ptr[CCudaBuffer] cudabuf
-
-        cudabuf = GetResultValue(self.context.get().View(
-            <uint8_t*>c_addr, c_size))
-        return pyarrow_wrap_cudabuffer_base(cudabuf, base)
-
-    def open_ipc_buffer(self, ipc_handle):
-        """ Open existing CUDA IPC memory handle
-
-        Parameters
-        ----------
-        ipc_handle : IpcMemHandle
-          Specify opaque pointer to CUipcMemHandle (driver API).
-
-        Returns
-        -------
-        buf : CudaBuffer
-          referencing device buffer
-        """
-        handle = pyarrow_unwrap_cudaipcmemhandle(ipc_handle)
-        cdef shared_ptr[CCudaBuffer] cudabuf
-        with nogil:
-            cudabuf = GetResultValue(
-                self.context.get().OpenIpcBuffer(handle.get()[0]))
-        return pyarrow_wrap_cudabuffer(cudabuf)
-
-    def buffer_from_data(self, object data, int64_t offset=0, int64_t size=-1):
-        """Create device buffer and initialize with data.
-
-        Parameters
-        ----------
-        data : {CudaBuffer, HostBuffer, Buffer, array-like}
-          Specify data to be copied to device buffer.
-        offset : int
-          Specify the offset of input buffer for device data
-          buffering. Default: 0.
-        size : int
-          Specify the size of device buffer in bytes. Default: all
-          (starting from input offset)
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer with copied data.
-        """
-        is_host_data = not pyarrow_is_cudabuffer(data)
-        buf = as_buffer(data) if is_host_data else data
-
-        bsize = buf.size
-        if offset < 0 or (bsize and offset >= bsize):
-            raise ValueError('offset argument is out-of-range')
-        if size < 0:
-            size = bsize - offset
-        elif offset + size > bsize:
-            raise ValueError(
-                'requested larger slice than available in device buffer')
-
-        if offset != 0 or size != bsize:
-            buf = buf.slice(offset, size)
-
-        result = self.new_buffer(size)
-        if is_host_data:
-            result.copy_from_host(buf, position=0, nbytes=size)
-        else:
-            result.copy_from_device(buf, position=0, nbytes=size)
-        return result
-
-    def buffer_from_object(self, obj):
-        """Create device buffer view of arbitrary object that references
-        device accessible memory.
-
-        When the object contains a non-contiguous view of device
-        accessible memory then the returned device buffer will contain
-        contiguous view of the memory, that is, including the
-        intermediate data that is otherwise invisible to the input
-        object.
-
-        Parameters
-        ----------
-        obj : {object, Buffer, HostBuffer, CudaBuffer, ...}
-          Specify an object that holds (device or host) address that
-          can be accessed from device. This includes objects with
-          types defined in pyarrow.cuda as well as arbitrary objects
-          that implement the CUDA array interface as defined by numba.
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of device accessible memory.
-
-        """
-        if isinstance(obj, HostBuffer):
-            return self.foreign_buffer(obj.address, obj.size, base=obj)
-        elif isinstance(obj, Buffer):
-            return CudaBuffer.from_buffer(obj)
-        elif isinstance(obj, CudaBuffer):
-            return obj
-        elif hasattr(obj, '__cuda_array_interface__'):
-            desc = obj.__cuda_array_interface__
-            addr = desc['data'][0]
-            if addr is None:
-                return self.new_buffer(0)
-            import numpy as np
-            start, end = get_contiguous_span(
-                desc['shape'], desc.get('strides'),
-                np.dtype(desc['typestr']).itemsize)
-            return self.foreign_buffer(addr + start, end - start, base=obj)
-        raise ArrowTypeError('cannot create device buffer view from'
-                             ' `%s` object' % (type(obj)))
-
-
-cdef class IpcMemHandle(_Weakrefable):
-    """A serializable container for a CUDA IPC handle.
-    """
-    cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h):
-        self.handle = h
-
-    @staticmethod
-    def from_buffer(Buffer opaque_handle):
-        """Create IpcMemHandle from opaque buffer (e.g. from another
-        process)
-
-        Parameters
-        ----------
-        opaque_handle :
-          a CUipcMemHandle as a const void*
-
-        Results
-        -------
-        ipc_handle : IpcMemHandle
-        """
-        c_buf = pyarrow_unwrap_buffer(opaque_handle)
-        cdef:
-            shared_ptr[CCudaIpcMemHandle] handle
-
-        handle = GetResultValue(
-            CCudaIpcMemHandle.FromBuffer(c_buf.get().data()))
-        return pyarrow_wrap_cudaipcmemhandle(handle)
-
-    def serialize(self, pool=None):
-        """Write IpcMemHandle to a Buffer
-
-        Parameters
-        ----------
-        pool : {MemoryPool, None}
-          Specify a pool to allocate memory from
-
-        Returns
-        -------
-        buf : Buffer
-          The serialized buffer.
-        """
-        cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
-        cdef shared_ptr[CBuffer] buf
-        cdef CCudaIpcMemHandle* h = self.handle.get()
-        with nogil:
-            buf = GetResultValue(h.Serialize(pool_))
-        return pyarrow_wrap_buffer(buf)
-
-
-cdef class CudaBuffer(Buffer):
-    """An Arrow buffer with data located in a GPU device.
-
-    To create a CudaBuffer instance, use Context.device_buffer().
-
-    The memory allocated in a CudaBuffer is freed when the buffer object
-    is deleted.
-    """
-
-    def __init__(self):
-        raise TypeError("Do not call CudaBuffer's constructor directly, use "
-                        "`<pyarrow.Context instance>.device_buffer`"
-                        " method instead.")
-
-    cdef void init_cuda(self,
-                        const shared_ptr[CCudaBuffer]& buffer,
-                        object base):
-        self.cuda_buffer = buffer
-        self.init(<shared_ptr[CBuffer]> buffer)
-        self.base = base
-
-    @staticmethod
-    def from_buffer(buf):
-        """ Convert back generic buffer into CudaBuffer
-
-        Parameters
-        ----------
-        buf : Buffer
-          Specify buffer containing CudaBuffer
-
-        Returns
-        -------
-        dbuf : CudaBuffer
-          Resulting device buffer.
-        """
-        c_buf = pyarrow_unwrap_buffer(buf)
-        cuda_buffer = GetResultValue(CCudaBuffer.FromBuffer(c_buf))
-        return pyarrow_wrap_cudabuffer(cuda_buffer)
-
-    @staticmethod
-    def from_numba(mem):
-        """Create a CudaBuffer view from numba MemoryPointer instance.
-
-        Parameters
-        ----------
-        mem :  numba.cuda.cudadrv.driver.MemoryPointer
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          Device buffer as a view of numba MemoryPointer.
-        """
-        ctx = Context.from_numba(mem.context)
-        if mem.device_pointer.value is None and mem.size==0:
-            return ctx.new_buffer(0)
-        return ctx.foreign_buffer(mem.device_pointer.value, mem.size, base=mem)
-
-    def to_numba(self):
-        """Return numba memory pointer of CudaBuffer instance.
-        """
-        import ctypes
-        from numba.cuda.cudadrv.driver import MemoryPointer
-        return MemoryPointer(self.context.to_numba(),
-                             pointer=ctypes.c_void_p(self.address),
-                             size=self.size)
-
-    cdef getitem(self, int64_t i):
-        return self.copy_to_host(position=i, nbytes=1)[0]
-
-    def copy_to_host(self, int64_t position=0, int64_t nbytes=-1,
-                     Buffer buf=None,
-                     MemoryPool memory_pool=None, c_bool resizable=False):
-        """Copy memory from GPU device to CPU host
-
-        Caller is responsible for ensuring that all tasks affecting
-        the memory are finished. Use
-
-          `<CudaBuffer instance>.context.synchronize()`
-
-        when needed.
-
-        Parameters
-        ----------
-        position : int
-          Specify the starting position of the source data in GPU
-          device buffer. Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          the position until host buffer is full).
-        buf : Buffer
-          Specify a pre-allocated output buffer in host. Default: None
-          (allocate new output buffer).
-        memory_pool : MemoryPool
-        resizable : bool
-          Specify extra arguments to allocate_buffer. Used only when
-          buf is None.
-
-        Returns
-        -------
-        buf : Buffer
-          Output buffer in host.
-
-        """
-        if position < 0 or (self.size and position > self.size) \
-           or (self.size == 0 and position != 0):
-            raise ValueError('position argument is out-of-range')
-        cdef:
-            int64_t c_nbytes
-        if buf is None:
-            if nbytes < 0:
-                # copy all starting from position to new host buffer
-                c_nbytes = self.size - position
-            else:
-                if nbytes > self.size - position:
-                    raise ValueError(
-                        'requested more to copy than available from '
-                        'device buffer')
-                # copy nbytes starting from position to new host buffeer
-                c_nbytes = nbytes
-            buf = allocate_buffer(c_nbytes, memory_pool=memory_pool,
-                                  resizable=resizable)
-        else:
-            if nbytes < 0:
-                # copy all from position until given host buffer is full
-                c_nbytes = min(self.size - position, buf.size)
-            else:
-                if nbytes > buf.size:
-                    raise ValueError(
-                        'requested copy does not fit into host buffer')
-                # copy nbytes from position to given host buffer
-                c_nbytes = nbytes
-
-        cdef:
-            shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
-            int64_t c_position = position
-        with nogil:
-            check_status(self.cuda_buffer.get()
-                         .CopyToHost(c_position, c_nbytes,
-                                     c_buf.get().mutable_data()))
-        return buf
-
-    def copy_from_host(self, data, int64_t position=0, int64_t nbytes=-1):
-        """Copy data from host to device.
-
-        The device buffer must be pre-allocated.
-
-        Parameters
-        ----------
-        data : {Buffer, array-like}
-          Specify data in host. It can be array-like that is valid
-          argument to py_buffer
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-        """
-        if position < 0 or position > self.size:
-            raise ValueError('position argument is out-of-range')
-        cdef:
-            int64_t c_nbytes
-        buf = as_buffer(data)
-
-        if nbytes < 0:
-            # copy from host buffer to device buffer starting from
-            # position until device buffer is full
-            c_nbytes = min(self.size - position, buf.size)
-        else:
-            if nbytes > buf.size:
-                raise ValueError(
-                    'requested more to copy than available from host buffer')
-            if nbytes > self.size - position:
-                raise ValueError(
-                    'requested more to copy than available in device buffer')
-            # copy nbytes from host buffer to device buffer starting
-            # from position
-            c_nbytes = nbytes
-
-        cdef:
-            shared_ptr[CBuffer] c_buf = pyarrow_unwrap_buffer(buf)
-            int64_t c_position = position
-        with nogil:
-            check_status(self.cuda_buffer.get().
-                         CopyFromHost(c_position, c_buf.get().data(),
-                                      c_nbytes))
-        return c_nbytes
-
-    def copy_from_device(self, buf, int64_t position=0, int64_t nbytes=-1):
-        """Copy data from device to device.
-
-        Parameters
-        ----------
-        buf : CudaBuffer
-          Specify source device buffer.
-        position : int
-          Specify the starting position of the copy in device buffer.
-          Default: 0.
-        nbytes : int
-          Specify the number of bytes to copy. Default: -1 (all from
-          source until device buffer, starting from position, is full)
-
-        Returns
-        -------
-        nbytes : int
-          Number of bytes copied.
-
-        """
-        if position < 0 or position > self.size:
-            raise ValueError('position argument is out-of-range')
-        cdef:
-            int64_t c_nbytes
-
-        if nbytes < 0:
-            # copy from source device buffer to device buffer starting
-            # from position until device buffer is full
-            c_nbytes = min(self.size - position, buf.size)
-        else:
-            if nbytes > buf.size:
-                raise ValueError(
-                    'requested more to copy than available from device buffer')
-            if nbytes > self.size - position:
-                raise ValueError(
-                    'requested more to copy than available in device buffer')
-            # copy nbytes from source device buffer to device buffer
-            # starting from position
-            c_nbytes = nbytes
-
-        cdef:
-            shared_ptr[CCudaBuffer] c_buf = pyarrow_unwrap_cudabuffer(buf)
-            int64_t c_position = position
-            shared_ptr[CCudaContext] c_src_ctx = pyarrow_unwrap_cudacontext(
-                buf.context)
-            void* c_source_data = <void*>(c_buf.get().address())
-
-        if self.context.handle != buf.context.handle:
-            with nogil:
-                check_status(self.cuda_buffer.get().
-                             CopyFromAnotherDevice(c_src_ctx, c_position,
-                                                   c_source_data, c_nbytes))
-        else:
-            with nogil:
-                check_status(self.cuda_buffer.get().
-                             CopyFromDevice(c_position, c_source_data,
-                                            c_nbytes))
-        return c_nbytes
-
-    def export_for_ipc(self):
-        """
-        Expose this device buffer as IPC memory which can be used in other
-        processes.
-
-        After calling this function, this device memory will not be
-        freed when the CudaBuffer is destructed.
-
-        Returns
-        -------
-        ipc_handle : IpcMemHandle
-          The exported IPC handle
-
-        """
-        cdef shared_ptr[CCudaIpcMemHandle] handle
-        with nogil:
-            handle = GetResultValue(self.cuda_buffer.get().ExportForIpc())
-        return pyarrow_wrap_cudaipcmemhandle(handle)
-
-    @property
-    def context(self):
-        """Returns the CUDA driver context of this buffer.
-        """
-        return pyarrow_wrap_cudacontext(self.cuda_buffer.get().context())
-
-    def slice(self, offset=0, length=None):
-        """Return slice of device buffer
-
-        Parameters
-        ----------
-        offset : int, default 0
-          Specify offset from the start of device buffer to slice
-        length : int, default None
-          Specify the length of slice (default is until end of device
-          buffer starting from offset). If the length is larger than
-          the data available, the returned slice will have a size of
-          the available data starting from the offset.
-
-        Returns
-        -------
-        sliced : CudaBuffer
-          Zero-copy slice of device buffer.
-
-        """
-        if offset < 0 or (self.size and offset >= self.size):
-            raise ValueError('offset argument is out-of-range')
-        cdef int64_t offset_ = offset
-        cdef int64_t size
-        if length is None:
-            size = self.size - offset_
-        elif offset + length <= self.size:
-            size = length
-        else:
-            size = self.size - offset
-        parent = pyarrow_unwrap_cudabuffer(self)
-        return pyarrow_wrap_cudabuffer(make_shared[CCudaBuffer](parent,
-                                                                offset_, size))
-
-    def to_pybytes(self):
-        """Return device buffer content as Python bytes.
-        """
-        return self.copy_to_host().to_pybytes()
-
-    def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
-        # Device buffer contains data pointers on the device. Hence,
-        # cannot support buffer protocol PEP-3118 for CudaBuffer.
-        raise BufferError('buffer protocol for device buffer not supported')
-
-
-cdef class HostBuffer(Buffer):
-    """Device-accessible CPU memory created using cudaHostAlloc.
-
-    To create a HostBuffer instance, use
-
-      cuda.new_host_buffer(<nbytes>)
-    """
-
-    def __init__(self):
-        raise TypeError("Do not call HostBuffer's constructor directly,"
-                        " use `cuda.new_host_buffer` function instead.")
-
-    cdef void init_host(self, const shared_ptr[CCudaHostBuffer]& buffer):
-        self.host_buffer = buffer
-        self.init(<shared_ptr[CBuffer]> buffer)
-
-    @property
-    def size(self):
-        return self.host_buffer.get().size()
-
-
-cdef class BufferReader(NativeFile):
-    """File interface for zero-copy read from CUDA buffers.
-
-    Note: Read methods return pointers to device memory. This means
-    you must be careful using this interface with any Arrow code which
-    may expect to be able to do anything other than pointer arithmetic
-    on the returned buffers.
-    """
-
-    def __cinit__(self, CudaBuffer obj):
-        self.buffer = obj
-        self.reader = new CCudaBufferReader(self.buffer.buffer)
-        self.set_random_access_file(
-            shared_ptr[CRandomAccessFile](self.reader))
-        self.is_readable = True
-
-    def read_buffer(self, nbytes=None):
-        """Return a slice view of the underlying device buffer.
-
-        The slice will start at the current reader position and will
-        have specified size in bytes.
-
-        Parameters
-        ----------
-        nbytes : int, default None
-          Specify the number of bytes to read. Default: None (read all
-          remaining bytes).
-
-        Returns
-        -------
-        cbuf : CudaBuffer
-          New device buffer.
-
-        """
-        cdef:
-            int64_t c_nbytes
-            int64_t bytes_read = 0
-            shared_ptr[CCudaBuffer] output
-
-        if nbytes is None:
-            c_nbytes = self.size() - self.tell()
-        else:
-            c_nbytes = nbytes
-
-        with nogil:
-            output = static_pointer_cast[CCudaBuffer, CBuffer](
-                GetResultValue(self.reader.Read(c_nbytes)))
-
-        return pyarrow_wrap_cudabuffer(output)
-
-
-cdef class BufferWriter(NativeFile):
-    """File interface for writing to CUDA buffers.
-
-    By default writes are unbuffered. Use set_buffer_size to enable
-    buffering.
-    """
-
-    def __cinit__(self, CudaBuffer buffer):
-        self.buffer = buffer
-        self.writer = new CCudaBufferWriter(self.buffer.cuda_buffer)
-        self.set_output_stream(shared_ptr[COutputStream](self.writer))
-        self.is_writable = True
-
-    def writeat(self, int64_t position, object data):
-        """Write data to buffer starting from position.
-
-        Parameters
-        ----------
-        position : int
-          Specify device buffer position where the data will be
-          written.
-        data : array-like
-          Specify data, the data instance must implement buffer
-          protocol.
-        """
-        cdef:
-            Buffer buf = as_buffer(data)
-            const uint8_t* c_data = buf.buffer.get().data()
-            int64_t c_size = buf.buffer.get().size()
-
-        with nogil:
-            check_status(self.writer.WriteAt(position, c_data, c_size))
-
-    def flush(self):
-        """ Flush the buffer stream """
-        with nogil:
-            check_status(self.writer.Flush())
-
-    def seek(self, int64_t position, int whence=0):
-        # TODO: remove this method after NativeFile.seek supports
-        # writable files.
-        cdef int64_t offset
-
-        with nogil:
-            if whence == 0:
-                offset = position
-            elif whence == 1:
-                offset = GetResultValue(self.writer.Tell())
-                offset = offset + position
-            else:
-                with gil:
-                    raise ValueError("Invalid value of whence: {0}"
-                                     .format(whence))
-            check_status(self.writer.Seek(offset))
-        return self.tell()
-
-    @property
-    def buffer_size(self):
-        """Returns size of host (CPU) buffer, 0 for unbuffered
-        """
-        return self.writer.buffer_size()
-
-    @buffer_size.setter
-    def buffer_size(self, int64_t buffer_size):
-        """Set CPU buffer size to limit calls to cudaMemcpy
-
-        Parameters
-        ----------
-        buffer_size : int
-          Specify the size of CPU buffer to allocate in bytes.
-        """
-        with nogil:
-            check_status(self.writer.SetBufferSize(buffer_size))
-
-    @property
-    def num_bytes_buffered(self):
-        """Returns number of bytes buffered on host
-        """
-        return self.writer.num_bytes_buffered()
-
-# Functions
-
-
-def new_host_buffer(const int64_t size, int device=0):
-    """Return buffer with CUDA-accessible memory on CPU host
-
-    Parameters
-    ----------
-    size : int
-      Specify the number of bytes to be allocated.
-    device : int
-      Specify GPU device number.
-
-    Returns
-    -------
-    dbuf : HostBuffer
-      Allocated host buffer
-    """
-    cdef shared_ptr[CCudaHostBuffer] buffer
-    with nogil:
-        buffer = GetResultValue(AllocateCudaHostBuffer(device, size))
-    return pyarrow_wrap_cudahostbuffer(buffer)
-
-
-def serialize_record_batch(object batch, object ctx):
-    """ Write record batch message to GPU device memory
-
-    Parameters
-    ----------
-    batch : RecordBatch
-      Record batch to write
-    ctx : Context
-      CUDA Context to allocate device memory from
-
-    Returns
-    -------
-    dbuf : CudaBuffer
-      device buffer which contains the record batch message
-    """
-    cdef shared_ptr[CCudaBuffer] buffer
-    cdef CRecordBatch* batch_ = pyarrow_unwrap_batch(batch).get()
-    cdef CCudaContext* ctx_ = pyarrow_unwrap_cudacontext(ctx).get()
-    with nogil:
-        buffer = GetResultValue(CudaSerializeRecordBatch(batch_[0], ctx_))
-    return pyarrow_wrap_cudabuffer(buffer)
-
-
-def read_message(object source, pool=None):
-    """ Read Arrow IPC message located on GPU device
-
-    Parameters
-    ----------
-    source : {CudaBuffer, cuda.BufferReader}
-      Device buffer or reader of device buffer.
-    pool : MemoryPool (optional)
-      Pool to allocate CPU memory for the metadata
-
-    Returns
-    -------
-    message : Message
-      The deserialized message, body still on device
-    """
-    cdef:
-        Message result = Message.__new__(Message)
-    cdef CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
-    if not isinstance(source, BufferReader):
-        reader = BufferReader(source)
-    with nogil:
-        result.message = move(
-            GetResultValue(ReadMessage(reader.reader, pool_)))
-    return result
-
-
-def read_record_batch(object buffer, object schema, *,
-                      DictionaryMemo dictionary_memo=None, pool=None):
-    """Construct RecordBatch referencing IPC message located on CUDA device.
-
-    While the metadata is copied to host memory for deserialization,
-    the record batch data remains on the device.
-
-    Parameters
-    ----------
-    buffer :
-      Device buffer containing the complete IPC message
-    schema : Schema
-      The schema for the record batch
-    dictionary_memo : DictionaryMemo, optional
-        If message contains dictionaries, must pass a populated
-        DictionaryMemo
-    pool : MemoryPool (optional)
-      Pool to allocate metadata from
-
-    Returns
-    -------
-    batch : RecordBatch
-      Reconstructed record batch, with device pointers
-
-    """
-    cdef:
-        shared_ptr[CSchema] schema_ = pyarrow_unwrap_schema(schema)
-        shared_ptr[CCudaBuffer] buffer_ = pyarrow_unwrap_cudabuffer(buffer)
-        CDictionaryMemo temp_memo
-        CDictionaryMemo* arg_dict_memo
-        CMemoryPool* pool_ = maybe_unbox_memory_pool(pool)
-        shared_ptr[CRecordBatch] batch
-
-    if dictionary_memo is not None:
-        arg_dict_memo = dictionary_memo.memo
-    else:
-        arg_dict_memo = &temp_memo
-
-    with nogil:
-        batch = GetResultValue(CudaReadRecordBatch(
-            schema_, arg_dict_memo, buffer_, pool_))
-    return pyarrow_wrap_batch(batch)
-
-
-# Public API
-
-
-cdef public api bint pyarrow_is_buffer(object buffer):
-    return isinstance(buffer, Buffer)
-
-# cudabuffer
-
-cdef public api bint pyarrow_is_cudabuffer(object buffer):
-    return isinstance(buffer, CudaBuffer)
-
-
-cdef public api object \
-        pyarrow_wrap_cudabuffer_base(const shared_ptr[CCudaBuffer]& buf, base):
-    cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
-    result.init_cuda(buf, base)
-    return result
-
-
-cdef public api object \
-        pyarrow_wrap_cudabuffer(const shared_ptr[CCudaBuffer]& buf):
-    cdef CudaBuffer result = CudaBuffer.__new__(CudaBuffer)
-    result.init_cuda(buf, None)
-    return result
-
-
-cdef public api shared_ptr[CCudaBuffer] pyarrow_unwrap_cudabuffer(object obj):
-    if pyarrow_is_cudabuffer(obj):
-        return (<CudaBuffer>obj).cuda_buffer
-    raise TypeError('expected CudaBuffer instance, got %s'
-                    % (type(obj).__name__))
-
-# cudahostbuffer
-
-cdef public api bint pyarrow_is_cudahostbuffer(object buffer):
-    return isinstance(buffer, HostBuffer)
-
-
-cdef public api object \
-        pyarrow_wrap_cudahostbuffer(const shared_ptr[CCudaHostBuffer]& buf):
-    cdef HostBuffer result = HostBuffer.__new__(HostBuffer)
-    result.init_host(buf)
-    return result
-
-
-cdef public api shared_ptr[CCudaHostBuffer] \
-        pyarrow_unwrap_cudahostbuffer(object obj):
-    if pyarrow_is_cudahostbuffer(obj):
-        return (<HostBuffer>obj).host_buffer
-    raise TypeError('expected HostBuffer instance, got %s'
-                    % (type(obj).__name__))
-
-# cudacontext
-
-cdef public api bint pyarrow_is_cudacontext(object ctx):
-    return isinstance(ctx, Context)
-
-
-cdef public api object \
-        pyarrow_wrap_cudacontext(const shared_ptr[CCudaContext]& ctx):
-    cdef Context result = Context.__new__(Context)
-    result.init(ctx)
-    return result
-
-
-cdef public api shared_ptr[CCudaContext] \
-        pyarrow_unwrap_cudacontext(object obj):
-    if pyarrow_is_cudacontext(obj):
-        return (<Context>obj).context
-    raise TypeError('expected Context instance, got %s'
-                    % (type(obj).__name__))
-
-# cudaipcmemhandle
-
-cdef public api bint pyarrow_is_cudaipcmemhandle(object handle):
-    return isinstance(handle, IpcMemHandle)
-
-
-cdef public api object \
-        pyarrow_wrap_cudaipcmemhandle(shared_ptr[CCudaIpcMemHandle]& h):
-    cdef IpcMemHandle result = IpcMemHandle.__new__(IpcMemHandle)
-    result.init(h)
-    return result
-
-
-cdef public api shared_ptr[CCudaIpcMemHandle] \
-        pyarrow_unwrap_cudaipcmemhandle(object obj):
-    if pyarrow_is_cudaipcmemhandle(obj):
-        return (<IpcMemHandle>obj).handle
-    raise TypeError('expected IpcMemHandle instance, got %s'
-                    % (type(obj).__name__))
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
deleted file mode 100644
index 6199428..0000000
--- a/python/pyarrow/_dataset.pyx
+++ /dev/null
@@ -1,2977 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-"""Dataset is currently unstable. APIs subject to change without notice."""
-
-from cpython.object cimport Py_LT, Py_EQ, Py_GT, Py_LE, Py_NE, Py_GE
-from cython.operator cimport dereference as deref
-
-import collections
-import os
-import warnings
-
-import pyarrow as pa
-from pyarrow.lib cimport *
-from pyarrow.lib import ArrowTypeError, frombytes, tobytes
-from pyarrow.includes.libarrow_dataset cimport *
-from pyarrow._fs cimport FileSystem, FileInfo, FileSelector
-from pyarrow._csv cimport ConvertOptions, ParseOptions, ReadOptions
-from pyarrow.util import _is_iterable, _is_path_like, _stringify_path
-
-from pyarrow._parquet cimport (
-    _create_writer_properties, _create_arrow_writer_properties,
-    FileMetaData, RowGroupMetaData, ColumnChunkMetaData
-)
-
-
-def _forbid_instantiation(klass, subclasses_instead=True):
-    msg = '{} is an abstract class thus cannot be initialized.'.format(
-        klass.__name__
-    )
-    if subclasses_instead:
-        subclasses = [cls.__name__ for cls in klass.__subclasses__]
-        msg += ' Use one of the subclasses instead: {}'.format(
-            ', '.join(subclasses)
-        )
-    raise TypeError(msg)
-
-
-cdef CFileSource _make_file_source(object file, FileSystem filesystem=None):
-
-    cdef:
-        CFileSource c_source
-        shared_ptr[CFileSystem] c_filesystem
-        c_string c_path
-        shared_ptr[CRandomAccessFile] c_file
-        shared_ptr[CBuffer] c_buffer
-
-    if isinstance(file, Buffer):
-        c_buffer = pyarrow_unwrap_buffer(file)
-        c_source = CFileSource(move(c_buffer))
-
-    elif _is_path_like(file):
-        if filesystem is None:
-            raise ValueError("cannot construct a FileSource from "
-                             "a path without a FileSystem")
-        c_filesystem = filesystem.unwrap()
-        c_path = tobytes(_stringify_path(file))
-        c_source = CFileSource(move(c_path), move(c_filesystem))
-
-    elif hasattr(file, 'read'):
-        # Optimistically hope this is file-like
-        c_file = get_native_file(file, False).get_random_access_file()
-        c_source = CFileSource(move(c_file))
-
-    else:
-        raise TypeError("cannot construct a FileSource "
-                        "from " + str(file))
-
-    return c_source
-
-
-cdef class Expression(_Weakrefable):
-    """
-    A logical expression to be evaluated against some input.
-
-    To create an expression:
-
-    - Use the factory function ``pyarrow.dataset.scalar()`` to create a
-      scalar (not necessary when combined, see example below).
-    - Use the factory function ``pyarrow.dataset.field()`` to reference
-      a field (column in table).
-    - Compare fields and scalars with ``<``, ``<=``, ``==``, ``>=``, ``>``.
-    - Combine expressions using python operators ``&`` (logical and),
-      ``|`` (logical or) and ``~`` (logical not).
-      Note: python keywords ``and``, ``or`` and ``not`` cannot be used
-      to combine expressions.
-    - Check whether the expression is contained in a list of values with
-      the ``pyarrow.dataset.Expression.isin()`` member function.
-
-    Examples
-    --------
-
-    >>> import pyarrow.dataset as ds
-    >>> (ds.field("a") < ds.scalar(3)) | (ds.field("b") > 7)
-    <pyarrow.dataset.Expression ((a < 3:int64) or (b > 7:int64))>
-    >>> ds.field('a') != 3
-    <pyarrow.dataset.Expression (a != 3)>
-    >>> ds.field('a').isin([1, 2, 3])
-    <pyarrow.dataset.Expression (a is in [
-      1,
-      2,
-      3
-    ])>
-    """
-    cdef:
-        CExpression expr
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef void init(self, const CExpression& sp):
-        self.expr = sp
-
-    @staticmethod
-    cdef wrap(const CExpression& sp):
-        cdef Expression self = Expression.__new__(Expression)
-        self.init(sp)
-        return self
-
-    cdef inline CExpression unwrap(self):
-        return self.expr
-
-    def equals(self, Expression other):
-        return self.expr.Equals(other.unwrap())
-
-    def __str__(self):
-        return frombytes(self.expr.ToString())
-
-    def __repr__(self):
-        return "<pyarrow.dataset.{0} {1}>".format(
-            self.__class__.__name__, str(self)
-        )
-
-    @staticmethod
-    def _deserialize(Buffer buffer not None):
-        return Expression.wrap(GetResultValue(CDeserializeExpression(
-            pyarrow_unwrap_buffer(buffer))))
-
-    def __reduce__(self):
-        buffer = pyarrow_wrap_buffer(GetResultValue(
-            CSerializeExpression(self.expr)))
-        return Expression._deserialize, (buffer,)
-
-    @staticmethod
-    cdef Expression _expr_or_scalar(object expr):
-        if isinstance(expr, Expression):
-            return (<Expression> expr)
-        return (<Expression> Expression._scalar(expr))
-
-    @staticmethod
-    cdef Expression _call(str function_name, list arguments,
-                          shared_ptr[CFunctionOptions] options=(
-                              <shared_ptr[CFunctionOptions]> nullptr)):
-        cdef:
-            vector[CExpression] c_arguments
-
-        for argument in arguments:
-            c_arguments.push_back((<Expression> argument).expr)
-
-        return Expression.wrap(CMakeCallExpression(tobytes(function_name),
-                                                   move(c_arguments), options))
-
-    def __richcmp__(self, other, int op):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call({
-            Py_EQ: "equal",
-            Py_NE: "not_equal",
-            Py_GT: "greater",
-            Py_GE: "greater_equal",
-            Py_LT: "less",
-            Py_LE: "less_equal",
-        }[op], [self, other])
-
-    def __bool__(self):
-        raise ValueError(
-            "An Expression cannot be evaluated to python True or False. "
-            "If you are using the 'and', 'or' or 'not' operators, use '&', "
-            "'|' or '~' instead."
-        )
-
-    def __invert__(self):
-        return Expression._call("invert", [self])
-
-    def __and__(Expression self, other):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call("and_kleene", [self, other])
-
-    def __or__(Expression self, other):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call("or_kleene", [self, other])
-
-    def __add__(Expression self, other):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call("add_checked", [self, other])
-
-    def __mul__(Expression self, other):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call("multiply_checked", [self, other])
-
-    def __sub__(Expression self, other):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call("subtract_checked", [self, other])
-
-    def __truediv__(Expression self, other):
-        other = Expression._expr_or_scalar(other)
-        return Expression._call("divide_checked", [self, other])
-
-    def is_valid(self):
-        """Checks whether the expression is not-null (valid)"""
-        return Expression._call("is_valid", [self])
-
-    def is_null(self):
-        """Checks whether the expression is null"""
-        return Expression._call("is_null", [self])
-
-    def cast(self, type, bint safe=True):
-        """Explicitly change the expression's data type"""
-        cdef shared_ptr[CCastOptions] c_options
-        c_options.reset(new CCastOptions(safe))
-        c_options.get().to_type = pyarrow_unwrap_data_type(ensure_type(type))
-        return Expression._call("cast", [self],
-                                <shared_ptr[CFunctionOptions]> c_options)
-
-    def isin(self, values):
-        """Checks whether the expression is contained in values"""
-        cdef:
-            shared_ptr[CFunctionOptions] c_options
-            CDatum c_values
-
-        if not isinstance(values, pa.Array):
-            values = pa.array(values)
-
-        c_values = CDatum(pyarrow_unwrap_array(values))
-        c_options.reset(new CSetLookupOptions(c_values, True))
-        return Expression._call("is_in", [self], c_options)
-
-    @staticmethod
-    def _field(str name not None):
-        return Expression.wrap(CMakeFieldExpression(tobytes(name)))
-
-    @staticmethod
-    def _scalar(value):
-        cdef:
-            Scalar scalar
-
-        if isinstance(value, Scalar):
-            scalar = value
-        else:
-            scalar = pa.scalar(value)
-
-        return Expression.wrap(CMakeScalarExpression(scalar.unwrap()))
-
-
-_deserialize = Expression._deserialize
-cdef Expression _true = Expression._scalar(True)
-
-
-cdef class Dataset(_Weakrefable):
-    """
-    Collection of data fragments and potentially child datasets.
-
-    Arrow Datasets allow you to query against data that has been split across
-    multiple files. This sharding of data may indicate partitioning, which
-    can accelerate queries that only touch some partitions (files).
-    """
-
-    cdef:
-        shared_ptr[CDataset] wrapped
-        CDataset* dataset
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef void init(self, const shared_ptr[CDataset]& sp):
-        self.wrapped = sp
-        self.dataset = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CDataset]& sp):
-        type_name = frombytes(sp.get().type_name())
-
-        classes = {
-            'union': UnionDataset,
-            'filesystem': FileSystemDataset,
-        }
-
-        class_ = classes.get(type_name, None)
-        if class_ is None:
-            raise TypeError(type_name)
-
-        cdef Dataset self = class_.__new__(class_)
-        self.init(sp)
-        return self
-
-    cdef shared_ptr[CDataset] unwrap(self) nogil:
-        return self.wrapped
-
-    @property
-    def partition_expression(self):
-        """
-        An Expression which evaluates to true for all data viewed by this
-        Dataset.
-        """
-        return Expression.wrap(self.dataset.partition_expression())
-
-    def replace_schema(self, Schema schema not None):
-        """
-        Return a copy of this Dataset with a different schema.
-
-        The copy will view the same Fragments. If the new schema is not
-        compatible with the original dataset's schema then an error will
-        be raised.
-        """
-        cdef shared_ptr[CDataset] copy = GetResultValue(
-            self.dataset.ReplaceSchema(pyarrow_unwrap_schema(schema)))
-        return Dataset.wrap(move(copy))
-
-    def get_fragments(self, Expression filter=None):
-        """Returns an iterator over the fragments in this dataset.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Return fragments matching the optional filter, either using the
-            partition_expression or internal information like Parquet's
-            statistics.
-
-        Returns
-        -------
-        fragments : iterator of Fragment
-        """
-        cdef:
-            CExpression c_filter
-            CFragmentIterator c_iterator
-
-        if filter is None:
-            c_fragments = move(GetResultValue(self.dataset.GetFragments()))
-        else:
-            c_filter = _bind(filter, self.schema)
-            c_fragments = move(GetResultValue(
-                self.dataset.GetFragments(c_filter)))
-
-        for maybe_fragment in c_fragments:
-            yield Fragment.wrap(GetResultValue(move(maybe_fragment)))
-
-    def _scanner(self, **kwargs):
-        return Scanner.from_dataset(self, **kwargs)
-
-    def scan(self, **kwargs):
-        """Builds a scan operation against the dataset.
-
-        It produces a stream of ScanTasks which is meant to be a unit of work
-        to be dispatched. The tasks are not executed automatically, the user is
-        responsible to execute and dispatch the individual tasks, so custom
-        local task scheduling can be implemented.
-
-        .. deprecated:: 4.0.0
-           Use `to_batches` instead.
-
-        Parameters
-        ----------
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 1M
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-
-        Returns
-        -------
-        scan_tasks : iterator of ScanTask
-
-        Examples
-        --------
-        >>> import pyarrow.dataset as ds
-        >>> dataset = ds.dataset("path/to/dataset")
-
-        Selecting a subset of the columns:
-
-        >>> dataset.scan(columns=["A", "B"])
-
-        Projecting selected columns using an expression:
-
-        >>> dataset.scan(columns={"A_int": ds.field("A").cast("int64")})
-
-        Filtering rows while scanning:
-
-        >>> dataset.scan(filter=ds.field("A") > 0)
-        """
-        return self._scanner(**kwargs).scan()
-
-    def to_batches(self, **kwargs):
-        """Read the dataset as materialized record batches.
-
-        Builds a scan operation against the dataset and sequentially executes
-        the ScanTasks as the returned generator gets consumed.
-
-        See scan method parameters documentation.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-        return self._scanner(**kwargs).to_batches()
-
-    def to_table(self, **kwargs):
-        """Read the dataset to an arrow table.
-
-        Note that this method reads all the selected data from the dataset
-        into memory.
-
-        See scan method parameters documentation.
-
-        Returns
-        -------
-        table : Table instance
-        """
-        return self._scanner(**kwargs).to_table()
-
-    def head(self, int num_rows, **kwargs):
-        """Load the first N rows of the dataset.
-
-        See scan method parameters documentation.
-
-        Returns
-        -------
-        table : Table instance
-        """
-        return self._scanner(**kwargs).head(num_rows)
-
-    @property
-    def schema(self):
-        """The common schema of the full Dataset"""
-        return pyarrow_wrap_schema(self.dataset.schema())
-
-
-cdef class InMemoryDataset(Dataset):
-    """A Dataset wrapping in-memory data.
-
-    Parameters
-    ----------
-    source
-        The data for this dataset. Can be a RecordBatch, Table, list of
-        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader.
-        If an iterable is provided, the schema must also be provided.
-    schema : Schema, optional
-        Only required if passing an iterable as the source.
-    """
-
-    cdef:
-        CInMemoryDataset* in_memory_dataset
-
-    def __init__(self, source, Schema schema=None):
-        cdef:
-            RecordBatchReader reader
-            shared_ptr[CInMemoryDataset] in_memory_dataset
-
-        if isinstance(source, (pa.RecordBatch, pa.Table)):
-            source = [source]
-
-        if isinstance(source, (list, tuple)):
-            batches = []
-            for item in source:
-                if isinstance(item, pa.RecordBatch):
-                    batches.append(item)
-                elif isinstance(item, pa.Table):
-                    batches.extend(item.to_batches())
-                else:
-                    raise TypeError(
-                        'Expected a list of tables or batches. The given list '
-                        'contains a ' + type(item).__name__)
-                if schema is None:
-                    schema = item.schema
-                elif not schema.equals(item.schema):
-                    raise ArrowTypeError(
-                        f'Item has schema\n{item.schema}\nwhich does not '
-                        f'match expected schema\n{schema}')
-            if not batches and schema is None:
-                raise ValueError('Must provide schema to construct in-memory '
-                                 'dataset from an empty list')
-            table = pa.Table.from_batches(batches, schema=schema)
-            in_memory_dataset = make_shared[CInMemoryDataset](
-                pyarrow_unwrap_table(table))
-        elif isinstance(source, pa.ipc.RecordBatchReader):
-            reader = source
-            in_memory_dataset = make_shared[CInMemoryDataset](reader.reader)
-        elif _is_iterable(source):
-            if schema is None:
-                raise ValueError('Must provide schema to construct in-memory '
-                                 'dataset from an iterable')
-            reader = pa.ipc.RecordBatchReader.from_batches(schema, source)
-            in_memory_dataset = make_shared[CInMemoryDataset](reader.reader)
-        else:
-            raise TypeError(
-                'Expected a table, batch, iterable of tables/batches, or a '
-                'record batch reader instead of the given type: ' +
-                type(source).__name__
-            )
-
-        self.init(<shared_ptr[CDataset]> in_memory_dataset)
-
-    cdef void init(self, const shared_ptr[CDataset]& sp):
-        Dataset.init(self, sp)
-        self.in_memory_dataset = <CInMemoryDataset*> sp.get()
-
-
-cdef class UnionDataset(Dataset):
-    """A Dataset wrapping child datasets.
-
-    Children's schemas must agree with the provided schema.
-
-    Parameters
-    ----------
-    schema : Schema
-        A known schema to conform to.
-    children : list of Dataset
-        One or more input children
-    """
-
-    cdef:
-        CUnionDataset* union_dataset
-
-    def __init__(self, Schema schema not None, children):
-        cdef:
-            Dataset child
-            CDatasetVector c_children
-            shared_ptr[CUnionDataset] union_dataset
-
-        for child in children:
-            c_children.push_back(child.wrapped)
-
-        union_dataset = GetResultValue(CUnionDataset.Make(
-            pyarrow_unwrap_schema(schema), move(c_children)))
-        self.init(<shared_ptr[CDataset]> union_dataset)
-
-    cdef void init(self, const shared_ptr[CDataset]& sp):
-        Dataset.init(self, sp)
-        self.union_dataset = <CUnionDataset*> sp.get()
-
-    def __reduce__(self):
-        return UnionDataset, (self.schema, self.children)
-
-    @property
-    def children(self):
-        cdef CDatasetVector children = self.union_dataset.children()
-        return [Dataset.wrap(children[i]) for i in range(children.size())]
-
-
-cdef class FileSystemDataset(Dataset):
-    """A Dataset of file fragments.
-
-    A FileSystemDataset is composed of one or more FileFragment.
-
-    Parameters
-    ----------
-    fragments : list[Fragments]
-        List of fragments to consume.
-    schema : Schema
-        The top-level schema of the Dataset.
-    format : FileFormat
-        File format of the fragments, currently only ParquetFileFormat,
-        IpcFileFormat, and CsvFileFormat are supported.
-    filesystem : FileSystem
-        FileSystem of the fragments.
-    root_partition : Expression, optional
-        The top-level partition of the DataDataset.
-    """
-
-    cdef:
-        CFileSystemDataset* filesystem_dataset
-
-    def __init__(self, fragments, Schema schema, FileFormat format,
-                 FileSystem filesystem=None, root_partition=None):
-        cdef:
-            FileFragment fragment=None
-            vector[shared_ptr[CFileFragment]] c_fragments
-            CResult[shared_ptr[CDataset]] result
-            shared_ptr[CFileSystem] c_filesystem
-
-        if root_partition is None:
-            root_partition = _true
-        elif not isinstance(root_partition, Expression):
-            raise TypeError(
-                "Argument 'root_partition' has incorrect type (expected "
-                "Epression, got {0})".format(type(root_partition))
-            )
-
-        for fragment in fragments:
-            c_fragments.push_back(
-                static_pointer_cast[CFileFragment, CFragment](
-                    fragment.unwrap()))
-
-            if filesystem is None:
-                filesystem = fragment.filesystem
-
-        if filesystem is not None:
-            c_filesystem = filesystem.unwrap()
-
-        result = CFileSystemDataset.Make(
-            pyarrow_unwrap_schema(schema),
-            (<Expression> root_partition).unwrap(),
-            format.unwrap(),
-            c_filesystem,
-            c_fragments
-        )
-        self.init(GetResultValue(result))
-
-    @property
-    def filesystem(self):
-        return FileSystem.wrap(self.filesystem_dataset.filesystem())
-
-    cdef void init(self, const shared_ptr[CDataset]& sp):
-        Dataset.init(self, sp)
-        self.filesystem_dataset = <CFileSystemDataset*> sp.get()
-
-    def __reduce__(self):
-        return FileSystemDataset, (
-            list(self.get_fragments()),
-            self.schema,
-            self.format,
-            self.filesystem,
-            self.partition_expression
-        )
-
-    @classmethod
-    def from_paths(cls, paths, schema=None, format=None,
-                   filesystem=None, partitions=None, root_partition=None):
-        """A Dataset created from a list of paths on a particular filesystem.
-
-        Parameters
-        ----------
-        paths : list of str
-            List of file paths to create the fragments from.
-        schema : Schema
-            The top-level schema of the DataDataset.
-        format : FileFormat
-            File format to create fragments from, currently only
-            ParquetFileFormat, IpcFileFormat, and CsvFileFormat are supported.
-        filesystem : FileSystem
-            The filesystem which files are from.
-        partitions : List[Expression], optional
-            Attach additional partition information for the file paths.
-        root_partition : Expression, optional
-            The top-level partition of the DataDataset.
-        """
-        cdef:
-            FileFragment fragment
-
-        if root_partition is None:
-            root_partition = _true
-
-        for arg, class_, name in [
-            (schema, Schema, 'schema'),
-            (format, FileFormat, 'format'),
-            (filesystem, FileSystem, 'filesystem'),
-            (root_partition, Expression, 'root_partition')
-        ]:
-            if not isinstance(arg, class_):
-                raise TypeError(
-                    "Argument '{0}' has incorrect type (expected {1}, "
-                    "got {2})".format(name, class_.__name__, type(arg))
-                )
-
-        partitions = partitions or [_true] * len(paths)
-
-        if len(paths) != len(partitions):
-            raise ValueError(
-                'The number of files resulting from paths_or_selector '
-                'must be equal to the number of partitions.'
-            )
-
-        fragments = [
-            format.make_fragment(path, filesystem, partitions[i])
-            for i, path in enumerate(paths)
-        ]
-        return FileSystemDataset(fragments, schema, format,
-                                 filesystem, root_partition)
-
-    @property
-    def files(self):
-        """List of the files"""
-        cdef vector[c_string] files = self.filesystem_dataset.files()
-        return [frombytes(f) for f in files]
-
-    @property
-    def format(self):
-        """The FileFormat of this source."""
-        return FileFormat.wrap(self.filesystem_dataset.format())
-
-
-cdef CExpression _bind(Expression filter, Schema schema) except *:
-    assert schema is not None
-
-    if filter is None:
-        return _true.unwrap()
-
-    return GetResultValue(filter.unwrap().Bind(
-        deref(pyarrow_unwrap_schema(schema).get())))
-
-
-cdef class FileWriteOptions(_Weakrefable):
-
-    cdef:
-        shared_ptr[CFileWriteOptions] wrapped
-        CFileWriteOptions* options
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
-        self.wrapped = sp
-        self.options = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CFileWriteOptions]& sp):
-        type_name = frombytes(sp.get().type_name())
-
-        classes = {
-            'ipc': IpcFileWriteOptions,
-            'parquet': ParquetFileWriteOptions,
-        }
-
-        class_ = classes.get(type_name, None)
-        if class_ is None:
-            raise TypeError(type_name)
-
-        cdef FileWriteOptions self = class_.__new__(class_)
-        self.init(sp)
-        return self
-
-    @property
-    def format(self):
-        return FileFormat.wrap(self.options.format())
-
-    cdef inline shared_ptr[CFileWriteOptions] unwrap(self):
-        return self.wrapped
-
-
-cdef class FileFormat(_Weakrefable):
-
-    cdef:
-        shared_ptr[CFileFormat] wrapped
-        CFileFormat* format
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef void init(self, const shared_ptr[CFileFormat]& sp):
-        self.wrapped = sp
-        self.format = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CFileFormat]& sp):
-        type_name = frombytes(sp.get().type_name())
-
-        classes = {
-            'ipc': IpcFileFormat,
-            'csv': CsvFileFormat,
-            'parquet': ParquetFileFormat,
-        }
-
-        class_ = classes.get(type_name, None)
-        if class_ is None:
-            raise TypeError(type_name)
-
-        cdef FileFormat self = class_.__new__(class_)
-        self.init(sp)
-        return self
-
-    cdef inline shared_ptr[CFileFormat] unwrap(self):
-        return self.wrapped
-
-    def inspect(self, file, filesystem=None):
-        """Infer the schema of a file."""
-        c_source = _make_file_source(file, filesystem)
-        c_schema = GetResultValue(self.format.Inspect(c_source))
-        return pyarrow_wrap_schema(move(c_schema))
-
-    def make_fragment(self, file, filesystem=None,
-                      Expression partition_expression=None):
-        """
-        Make a FileFragment of this FileFormat. The filter may not reference
-        fields absent from the provided schema. If no schema is provided then
-        one will be inferred.
-        """
-        if partition_expression is None:
-            partition_expression = _true
-
-        c_source = _make_file_source(file, filesystem)
-        c_fragment = <shared_ptr[CFragment]> GetResultValue(
-            self.format.MakeFragment(move(c_source),
-                                     partition_expression.unwrap(),
-                                     <shared_ptr[CSchema]>nullptr))
-        return Fragment.wrap(move(c_fragment))
-
-    def make_write_options(self):
-        return FileWriteOptions.wrap(self.format.DefaultWriteOptions())
-
-    @property
-    def default_extname(self):
-        return frombytes(self.format.type_name())
-
-    @property
-    def default_fragment_scan_options(self):
-        return FragmentScanOptions.wrap(
-            self.wrapped.get().default_fragment_scan_options)
-
-    @default_fragment_scan_options.setter
-    def default_fragment_scan_options(self, FragmentScanOptions options):
-        if options is None:
-            self.wrapped.get().default_fragment_scan_options =\
-                <shared_ptr[CFragmentScanOptions]>nullptr
-        else:
-            self._set_default_fragment_scan_options(options)
-
-    cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
-        raise ValueError(f"Cannot set fragment scan options for "
-                         f"'{options.type_name}' on {self.__class__.__name__}")
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return False
-
-
-cdef class Fragment(_Weakrefable):
-    """Fragment of data from a Dataset."""
-
-    cdef:
-        shared_ptr[CFragment] wrapped
-        CFragment* fragment
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef void init(self, const shared_ptr[CFragment]& sp):
-        self.wrapped = sp
-        self.fragment = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CFragment]& sp):
-        type_name = frombytes(sp.get().type_name())
-
-        classes = {
-            # IpcFileFormat and CsvFileFormat do not have corresponding
-            # subclasses of FileFragment
-            'ipc': FileFragment,
-            'csv': FileFragment,
-            'parquet': ParquetFileFragment,
-        }
-
-        class_ = classes.get(type_name, None)
-        if class_ is None:
-            class_ = Fragment
-
-        cdef Fragment self = class_.__new__(class_)
-        self.init(sp)
-        return self
-
-    cdef inline shared_ptr[CFragment] unwrap(self):
-        return self.wrapped
-
-    @property
-    def physical_schema(self):
-        """Return the physical schema of this Fragment. This schema can be
-        different from the dataset read schema."""
-        cdef:
-            shared_ptr[CSchema] c_schema
-
-        c_schema = GetResultValue(self.fragment.ReadPhysicalSchema())
-        return pyarrow_wrap_schema(c_schema)
-
-    @property
-    def partition_expression(self):
-        """An Expression which evaluates to true for all data viewed by this
-        Fragment.
-        """
-        return Expression.wrap(self.fragment.partition_expression())
-
-    def _scanner(self, **kwargs):
-        return Scanner.from_fragment(self, **kwargs)
-
-    def scan(self, Schema schema=None, **kwargs):
-        """Builds a scan operation against the dataset.
-
-        It produces a stream of ScanTasks which is meant to be a unit of work
-        to be dispatched. The tasks are not executed automatically, the user is
-        responsible to execute and dispatch the individual tasks, so custom
-        local task scheduling can be implemented.
-
-        .. deprecated:: 4.0.0
-           Use `to_batches` instead.
-
-        Parameters
-        ----------
-        schema : Schema
-            Schema to use for scanning. This is used to unify a Fragment to
-            it's Dataset's schema. If not specified this will use the
-            Fragment's physical schema which might differ for each Fragment.
-        columns : list of str, default None
-            The columns to project. This can be a list of column names to
-            include (order and duplicates will be preserved), or a dictionary
-            with {new_column_name: expression} values for more advanced
-            projections.
-            The columns will be passed down to Datasets and corresponding data
-            fragments to avoid loading, copying, and deserializing columns
-            that will not be required further down the compute chain.
-            By default all of the available columns are projected. Raises
-            an exception if any of the referenced column names does not exist
-            in the dataset's Schema.
-        filter : Expression, default None
-            Scan will return only the rows matching the filter.
-            If possible the predicate will be pushed down to exploit the
-            partition information or internal metadata found in the data
-            source, e.g. Parquet statistics. Otherwise filters the loaded
-            RecordBatches before yielding them.
-        batch_size : int, default 1M
-            The maximum row count for scanned record batches. If scanned
-            record batches are overflowing memory then this method can be
-            called to reduce their size.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
-
-        Returns
-        -------
-        scan_tasks : iterator of ScanTask
-        """
-        return self._scanner(schema=schema, **kwargs).scan()
-
-    def to_batches(self, Schema schema=None, **kwargs):
-        """Read the fragment as materialized record batches.
-
-        See scan method parameters documentation.
-
-        Returns
-        -------
-        record_batches : iterator of RecordBatch
-        """
-        return self._scanner(schema=schema, **kwargs).to_batches()
-
-    def to_table(self, Schema schema=None, **kwargs):
-        """Convert this Fragment into a Table.
-
-        Use this convenience utility with care. This will serially materialize
-        the Scan result in memory before creating the Table.
-
-        See scan method parameters documentation.
-
-        Returns
-        -------
-        table : Table
-        """
-        return self._scanner(schema=schema, **kwargs).to_table()
-
-    def head(self, int num_rows, **kwargs):
-        """Load the first N rows of the fragment.
-
-        See scan method parameters documentation.
-
-        Returns
-        -------
-        table : Table instance
-        """
-        return self._scanner(**kwargs).head(num_rows)
-
-
-cdef class FileFragment(Fragment):
-    """A Fragment representing a data file."""
-
-    cdef:
-        CFileFragment* file_fragment
-
-    cdef void init(self, const shared_ptr[CFragment]& sp):
-        Fragment.init(self, sp)
-        self.file_fragment = <CFileFragment*> sp.get()
-
-    def __reduce__(self):
-        buffer = self.buffer
-        return self.format.make_fragment, (
-            self.path if buffer is None else buffer,
-            self.filesystem,
-            self.partition_expression
-        )
-
-    @property
-    def path(self):
-        """
-        The path of the data file viewed by this fragment, if it views a
-        file. If instead it views a buffer, this will be "<Buffer>".
-        """
-        return frombytes(self.file_fragment.source().path())
-
-    @property
-    def filesystem(self):
-        """
-        The FileSystem containing the data file viewed by this fragment, if
-        it views a file. If instead it views a buffer, this will be None.
-        """
-        cdef:
-            shared_ptr[CFileSystem] c_fs
-        c_fs = self.file_fragment.source().filesystem()
-
-        if c_fs.get() == nullptr:
-            return None
-
-        return FileSystem.wrap(c_fs)
-
-    @property
-    def buffer(self):
-        """
-        The buffer viewed by this fragment, if it views a buffer. If
-        instead it views a file, this will be None.
-        """
-        cdef:
-            shared_ptr[CBuffer] c_buffer
-        c_buffer = self.file_fragment.source().buffer()
-
-        if c_buffer.get() == nullptr:
-            return None
-
-        return pyarrow_wrap_buffer(c_buffer)
-
-    @property
-    def format(self):
-        """
-        The format of the data file viewed by this fragment.
-        """
-        return FileFormat.wrap(self.file_fragment.format())
-
-
-class RowGroupInfo:
-    """A wrapper class for RowGroup information"""
-
-    def __init__(self, id, metadata, schema):
-        self.id = id
-        self.metadata = metadata
-        self.schema = schema
-
-    @property
-    def num_rows(self):
-        return self.metadata.num_rows
-
-    @property
-    def total_byte_size(self):
-        return self.metadata.total_byte_size
-
-    @property
-    def statistics(self):
-        def name_stats(i):
-            col = self.metadata.column(i)
-
-            stats = col.statistics
-            if stats is None or not stats.has_min_max:
-                return None, None
-
-            name = col.path_in_schema
-            field_index = self.schema.get_field_index(name)
-            if field_index < 0:
-                return None, None
-
-            typ = self.schema.field(field_index).type
-            return col.path_in_schema, {
-                'min': pa.scalar(stats.min, type=typ).as_py(),
-                'max': pa.scalar(stats.max, type=typ).as_py()
-            }
-
-        return {
-            name: stats for name, stats
-            in map(name_stats, range(self.metadata.num_columns))
-            if stats is not None
-        }
-
-    def __repr__(self):
-        return "RowGroupInfo({})".format(self.id)
-
-    def __eq__(self, other):
-        if isinstance(other, int):
-            return self.id == other
-        if not isinstance(other, RowGroupInfo):
-            return False
-        return self.id == other.id
-
-
-cdef class FragmentScanOptions(_Weakrefable):
-    """Scan options specific to a particular fragment and scan operation."""
-
-    cdef:
-        shared_ptr[CFragmentScanOptions] wrapped
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
-        self.wrapped = sp
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CFragmentScanOptions]& sp):
-        if not sp:
-            return None
-
-        type_name = frombytes(sp.get().type_name())
-
-        classes = {
-            'csv': CsvFragmentScanOptions,
-            'parquet': ParquetFragmentScanOptions,
-        }
-
-        class_ = classes.get(type_name, None)
-        if class_ is None:
-            raise TypeError(type_name)
-
-        cdef FragmentScanOptions self = class_.__new__(class_)
-        self.init(sp)
-        return self
-
-    @property
-    def type_name(self):
-        return frombytes(self.wrapped.get().type_name())
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return False
-
-
-cdef class ParquetFileFragment(FileFragment):
-    """A Fragment representing a parquet file."""
-
-    cdef:
-        CParquetFileFragment* parquet_file_fragment
-
-    cdef void init(self, const shared_ptr[CFragment]& sp):
-        FileFragment.init(self, sp)
-        self.parquet_file_fragment = <CParquetFileFragment*> sp.get()
-
-    def __reduce__(self):
-        buffer = self.buffer
-        row_groups = [row_group.id for row_group in self.row_groups]
-        return self.format.make_fragment, (
-            self.path if buffer is None else buffer,
-            self.filesystem,
-            self.partition_expression,
-            row_groups
-        )
-
-    def ensure_complete_metadata(self):
-        """
-        Ensure that all metadata (statistics, physical schema, ...) have
-        been read and cached in this fragment.
-        """
-        check_status(self.parquet_file_fragment.EnsureCompleteMetadata())
-
-    @property
-    def row_groups(self):
-        metadata = self.metadata
-        cdef vector[int] row_groups = self.parquet_file_fragment.row_groups()
-        return [RowGroupInfo(i, metadata.row_group(i), self.physical_schema)
-                for i in row_groups]
-
-    @property
-    def metadata(self):
-        self.ensure_complete_metadata()
-        cdef FileMetaData metadata = FileMetaData()
-        metadata.init(self.parquet_file_fragment.metadata())
-        return metadata
-
-    @property
-    def num_row_groups(self):
-        """
-        Return the number of row groups viewed by this fragment (not the
-        number of row groups in the origin file).
-        """
-        self.ensure_complete_metadata()
-        return self.parquet_file_fragment.row_groups().size()
-
-    def split_by_row_group(self, Expression filter=None,
-                           Schema schema=None):
-        """
-        Split the fragment into multiple fragments.
-
-        Yield a Fragment wrapping each row group in this ParquetFileFragment.
-        Row groups will be excluded whose metadata contradicts the optional
-        filter.
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Only include the row groups which satisfy this predicate (using
-            the Parquet RowGroup statistics).
-        schema : Schema, default None
-            Schema to use when filtering row groups. Defaults to the
-            Fragment's phsyical schema
-
-        Returns
-        -------
-        A list of Fragments
-        """
-        cdef:
-            vector[shared_ptr[CFragment]] c_fragments
-            CExpression c_filter
-            shared_ptr[CFragment] c_fragment
-
-        schema = schema or self.physical_schema
-        c_filter = _bind(filter, schema)
-        with nogil:
-            c_fragments = move(GetResultValue(
-                self.parquet_file_fragment.SplitByRowGroup(move(c_filter))))
-
-        return [Fragment.wrap(c_fragment) for c_fragment in c_fragments]
-
-    def subset(self, Expression filter=None, Schema schema=None,
-               object row_group_ids=None):
-        """
-        Create a subset of the fragment (viewing a subset of the row groups).
-
-        Subset can be specified by either a filter predicate (with optional
-        schema) or by a list of row group IDs. Note that when using a filter,
-        the resulting fragment can be empty (viewing no row groups).
-
-        Parameters
-        ----------
-        filter : Expression, default None
-            Only include the row groups which satisfy this predicate (using
-            the Parquet RowGroup statistics).
-        schema : Schema, default None
-            Schema to use when filtering row groups. Defaults to the
-            Fragment's phsyical schema
-        row_group_ids : list of ints
-            The row group IDs to include in the subset. Can only be specified
-            if `filter` is None.
-
-        Returns
-        -------
-        ParquetFileFragment
-        """
-        cdef:
-            CExpression c_filter
-            vector[int] c_row_group_ids
-            shared_ptr[CFragment] c_fragment
-
-        if filter is not None and row_group_ids is not None:
-            raise ValueError(
-                "Cannot specify both 'filter' and 'row_group_ids'."
-            )
-
-        if filter is not None:
-            schema = schema or self.physical_schema
-            c_filter = _bind(filter, schema)
-            with nogil:
-                c_fragment = move(GetResultValue(
-                    self.parquet_file_fragment.SubsetWithFilter(
-                        move(c_filter))))
-        elif row_group_ids is not None:
-            c_row_group_ids = [
-                <int> row_group for row_group in sorted(set(row_group_ids))
-            ]
-            with nogil:
-                c_fragment = move(GetResultValue(
-                    self.parquet_file_fragment.SubsetWithIds(
-                        move(c_row_group_ids))))
-        else:
-            raise ValueError(
-                "Need to specify one of 'filter' or 'row_group_ids'"
-            )
-
-        return Fragment.wrap(c_fragment)
-
-
-cdef class ParquetReadOptions(_Weakrefable):
-    """
-    Parquet format specific options for reading.
-
-    Parameters
-    ----------
-    dictionary_columns : list of string, default None
-        Names of columns which should be dictionary encoded as
-        they are read.
-    """
-
-    cdef public:
-        set dictionary_columns
-
-    # Also see _PARQUET_READ_OPTIONS
-    def __init__(self, dictionary_columns=None):
-        self.dictionary_columns = set(dictionary_columns or set())
-
-    def equals(self, ParquetReadOptions other):
-        return self.dictionary_columns == other.dictionary_columns
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return False
-
-    def __repr__(self):
-        return (f"<ParquetReadOptions"
-                f" dictionary_columns={self.dictionary_columns}>")
-
-
-cdef class ParquetFileWriteOptions(FileWriteOptions):
-
-    cdef:
-        CParquetFileWriteOptions* parquet_options
-        object _properties
-
-    def update(self, **kwargs):
-        arrow_fields = {
-            "use_deprecated_int96_timestamps",
-            "coerce_timestamps",
-            "allow_truncated_timestamps",
-        }
-
-        setters = set()
-        for name, value in kwargs.items():
-            if name not in self._properties:
-                raise TypeError("unexpected parquet write option: " + name)
-            self._properties[name] = value
-            if name in arrow_fields:
-                setters.add(self._set_arrow_properties)
-            else:
-                setters.add(self._set_properties)
-
-        for setter in setters:
-            setter()
-
-    def _set_properties(self):
-        cdef CParquetFileWriteOptions* opts = self.parquet_options
-
-        opts.writer_properties = _create_writer_properties(
-            use_dictionary=self._properties["use_dictionary"],
-            compression=self._properties["compression"],
-            version=self._properties["version"],
-            write_statistics=self._properties["write_statistics"],
-            data_page_size=self._properties["data_page_size"],
-            compression_level=self._properties["compression_level"],
-            use_byte_stream_split=(
-                self._properties["use_byte_stream_split"]
-            ),
-            data_page_version=self._properties["data_page_version"],
-        )
-
-    def _set_arrow_properties(self):
-        cdef CParquetFileWriteOptions* opts = self.parquet_options
-
-        opts.arrow_writer_properties = _create_arrow_writer_properties(
-            use_deprecated_int96_timestamps=(
-                self._properties["use_deprecated_int96_timestamps"]
-            ),
-            coerce_timestamps=self._properties["coerce_timestamps"],
-            allow_truncated_timestamps=(
-                self._properties["allow_truncated_timestamps"]
-            ),
-            writer_engine_version="V2",
-            use_compliant_nested_type=(
-                self._properties["use_compliant_nested_type"]
-            )
-        )
-
-    cdef void init(self, const shared_ptr[CFileWriteOptions]& sp):
-        FileWriteOptions.init(self, sp)
-        self.parquet_options = <CParquetFileWriteOptions*> sp.get()
-        self._properties = dict(
-            use_dictionary=True,
-            compression="snappy",
-            version="1.0",
-            write_statistics=None,
-            data_page_size=None,
-            compression_level=None,
-            use_byte_stream_split=False,
-            data_page_version="1.0",
-            use_deprecated_int96_timestamps=False,
-            coerce_timestamps=None,
-            allow_truncated_timestamps=False,
-            use_compliant_nested_type=False,
-        )
-        self._set_properties()
-        self._set_arrow_properties()
-
-
-cdef set _PARQUET_READ_OPTIONS = {'dictionary_columns'}
-
-
-cdef class ParquetFileFormat(FileFormat):
-
-    cdef:
-        CParquetFileFormat* parquet_format
-
-    def __init__(self, read_options=None,
-                 default_fragment_scan_options=None, **kwargs):
-        cdef:
-            shared_ptr[CParquetFileFormat] wrapped
-            CParquetFileFormatReaderOptions* options
-
-        # Read/scan options
-        read_options_args = {option: kwargs[option] for option in kwargs
-                             if option in _PARQUET_READ_OPTIONS}
-        scan_args = {option: kwargs[option] for option in kwargs
-                     if option not in _PARQUET_READ_OPTIONS}
-        if read_options and read_options_args:
-            duplicates = ', '.join(sorted(read_options_args))
-            raise ValueError(f'If `read_options` is given, '
-                             f'cannot specify {duplicates}')
-        if default_fragment_scan_options and scan_args:
-            duplicates = ', '.join(sorted(scan_args))
-            raise ValueError(f'If `default_fragment_scan_options` is given, '
-                             f'cannot specify {duplicates}')
-
-        if read_options is None:
-            read_options = ParquetReadOptions(**read_options_args)
-        elif isinstance(read_options, dict):
-            # For backwards compatibility
-            duplicates = []
-            for option, value in read_options.items():
-                if option in _PARQUET_READ_OPTIONS:
-                    read_options_args[option] = value
-                else:
-                    duplicates.append(option)
-                    scan_args[option] = value
-            if duplicates:
-                duplicates = ", ".join(duplicates)
-                warnings.warn(f'The scan options {duplicates} should be '
-                              'specified directly as keyword arguments')
-            read_options = ParquetReadOptions(**read_options_args)
-        elif not isinstance(read_options, ParquetReadOptions):
-            raise TypeError('`read_options` must be either a dictionary or an '
-                            'instance of ParquetReadOptions')
-
-        if default_fragment_scan_options is None:
-            default_fragment_scan_options = ParquetFragmentScanOptions(
-                **scan_args)
-        elif isinstance(default_fragment_scan_options, dict):
-            default_fragment_scan_options = ParquetFragmentScanOptions(
-                **default_fragment_scan_options)
-        elif not isinstance(default_fragment_scan_options,
-                            ParquetFragmentScanOptions):
-            raise TypeError('`default_fragment_scan_options` must be either a '
-                            'dictionary or an instance of '
-                            'ParquetFragmentScanOptions')
-
-        wrapped = make_shared[CParquetFileFormat]()
-        options = &(wrapped.get().reader_options)
-        if read_options.dictionary_columns is not None:
-            for column in read_options.dictionary_columns:
-                options.dict_columns.insert(tobytes(column))
-
-        self.init(<shared_ptr[CFileFormat]> wrapped)
-        self.default_fragment_scan_options = default_fragment_scan_options
-
-    cdef void init(self, const shared_ptr[CFileFormat]& sp):
-        FileFormat.init(self, sp)
-        self.parquet_format = <CParquetFileFormat*> sp.get()
-
-    @property
-    def read_options(self):
-        cdef CParquetFileFormatReaderOptions* options
-        options = &self.parquet_format.reader_options
-        return ParquetReadOptions(
-            dictionary_columns={frombytes(col)
-                                for col in options.dict_columns},
-        )
-
-    def make_write_options(self, **kwargs):
-        opts = FileFormat.make_write_options(self)
-        (<ParquetFileWriteOptions> opts).update(**kwargs)
-        return opts
-
-    cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
-        if options.type_name == 'parquet':
-            self.parquet_format.default_fragment_scan_options = options.wrapped
-        else:
-            super()._set_default_fragment_scan_options(options)
-
-    def equals(self, ParquetFileFormat other):
-        return (
-            self.read_options.equals(other.read_options) and
-            self.default_fragment_scan_options ==
-            other.default_fragment_scan_options
-        )
-
-    def __reduce__(self):
-        return ParquetFileFormat, (self.read_options,
-                                   self.default_fragment_scan_options)
-
-    def __repr__(self):
-        return f"<ParquetFileFormat read_options={self.read_options}>"
-
-    def make_fragment(self, file, filesystem=None,
-                      Expression partition_expression=None, row_groups=None):
-        cdef:
-            vector[int] c_row_groups
-
-        if partition_expression is None:
-            partition_expression = _true
-
-        if row_groups is None:
-            return super().make_fragment(file, filesystem,
-                                         partition_expression)
-
-        c_source = _make_file_source(file, filesystem)
-        c_row_groups = [<int> row_group for row_group in set(row_groups)]
-
-        c_fragment = <shared_ptr[CFragment]> GetResultValue(
-            self.parquet_format.MakeFragment(move(c_source),
-                                             partition_expression.unwrap(),
-                                             <shared_ptr[CSchema]>nullptr,
-                                             move(c_row_groups)))
-        return Fragment.wrap(move(c_fragment))
-
-
-cdef class ParquetFragmentScanOptions(FragmentScanOptions):
-    """Scan-specific options for Parquet fragments.
-
-    Parameters
-    ----------
-    use_buffered_stream : bool, default False
-        Read files through buffered input streams rather than loading entire
-        row groups at once. This may be enabled to reduce memory overhead.
-        Disabled by default.
-    buffer_size : int, default 8192
-        Size of buffered stream, if enabled. Default is 8KB.
-    pre_buffer : bool, default False
-        If enabled, pre-buffer the raw Parquet data instead of issuing one
-        read per column chunk. This can improve performance on high-latency
-        filesystems.
-    enable_parallel_column_conversion : bool, default False
-        EXPERIMENTAL: Parallelize conversion across columns. This option is
-        ignored if a scan is already parallelized across input files to avoid
-        thread contention. This option will be removed after support is added
-        for simultaneous parallelization across files and columns.
-    """
-
-    cdef:
-        CParquetFragmentScanOptions* parquet_options
-
-    # Avoid mistakingly creating attributes
-    __slots__ = ()
-
-    def __init__(self, bint use_buffered_stream=False,
-                 buffer_size=8192,
-                 bint pre_buffer=False,
-                 bint enable_parallel_column_conversion=False):
-        self.init(shared_ptr[CFragmentScanOptions](
-            new CParquetFragmentScanOptions()))
-        self.use_buffered_stream = use_buffered_stream
-        self.buffer_size = buffer_size
-        self.pre_buffer = pre_buffer
-        self.enable_parallel_column_conversion = \
-            enable_parallel_column_conversion
-
-    cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
-        FragmentScanOptions.init(self, sp)
-        self.parquet_options = <CParquetFragmentScanOptions*> sp.get()
-
-    cdef CReaderProperties* reader_properties(self):
-        return self.parquet_options.reader_properties.get()
-
-    cdef ArrowReaderProperties* arrow_reader_properties(self):
-        return self.parquet_options.arrow_reader_properties.get()
-
-    @property
-    def use_buffered_stream(self):
-        return self.reader_properties().is_buffered_stream_enabled()
-
-    @use_buffered_stream.setter
-    def use_buffered_stream(self, bint use_buffered_stream):
-        if use_buffered_stream:
-            self.reader_properties().enable_buffered_stream()
-        else:
-            self.reader_properties().disable_buffered_stream()
-
-    @property
-    def buffer_size(self):
-        return self.reader_properties().buffer_size()
-
-    @buffer_size.setter
-    def buffer_size(self, buffer_size):
-        if buffer_size <= 0:
-            raise ValueError("Buffer size must be larger than zero")
-        self.reader_properties().set_buffer_size(buffer_size)
-
-    @property
-    def pre_buffer(self):
-        return self.arrow_reader_properties().pre_buffer()
-
-    @pre_buffer.setter
-    def pre_buffer(self, bint pre_buffer):
-        self.arrow_reader_properties().set_pre_buffer(pre_buffer)
-
-    @property
-    def enable_parallel_column_conversion(self):
-        return self.parquet_options.enable_parallel_column_conversion
-
-    @enable_parallel_column_conversion.setter
-    def enable_parallel_column_conversion(
-            self, bint enable_parallel_column_conversion):
-        self.parquet_options.enable_parallel_column_conversion = \
-            enable_parallel_column_conversion
-
-    def equals(self, ParquetFragmentScanOptions other):
-        return (
-            self.use_buffered_stream == other.use_buffered_stream and
-            self.buffer_size == other.buffer_size and
-            self.pre_buffer == other.pre_buffer and
-            self.enable_parallel_column_conversion ==
-            other.enable_parallel_column_conversion)
-
-    def __reduce__(self):
-        return ParquetFragmentScanOptions, (
-            self.use_buffered_stream, self.buffer_size, self.pre_buffer,
-            self.enable_parallel_column_conversion)
-
-
-cdef class IpcFileWriteOptions(FileWriteOptions):
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-
-cdef class IpcFileFormat(FileFormat):
-
-    def __init__(self):
-        self.init(shared_ptr[CFileFormat](new CIpcFileFormat()))
-
-    def equals(self, IpcFileFormat other):
-        return True
-
-    @property
-    def default_extname(self):
-        return "feather"
-
-    def __reduce__(self):
-        return IpcFileFormat, tuple()
-
-
-cdef class CsvFileFormat(FileFormat):
-    cdef:
-        CCsvFileFormat* csv_format
-
-    # Avoid mistakingly creating attributes
-    __slots__ = ()
-
-    def __init__(self, ParseOptions parse_options=None,
-                 default_fragment_scan_options=None,
-                 ConvertOptions convert_options=None,
-                 ReadOptions read_options=None):
-        self.init(shared_ptr[CFileFormat](new CCsvFileFormat()))
-        if parse_options is not None:
-            self.parse_options = parse_options
-        if convert_options is not None or read_options is not None:
-            if default_fragment_scan_options:
-                raise ValueError('If `default_fragment_scan_options` is '
-                                 'given, cannot specify convert_options '
-                                 'or read_options')
-            self.default_fragment_scan_options = CsvFragmentScanOptions(
-                convert_options=convert_options, read_options=read_options)
-        elif isinstance(default_fragment_scan_options, dict):
-            self.default_fragment_scan_options = CsvFragmentScanOptions(
-                **default_fragment_scan_options)
-        elif isinstance(default_fragment_scan_options, CsvFragmentScanOptions):
-            self.default_fragment_scan_options = default_fragment_scan_options
-        elif default_fragment_scan_options is not None:
-            raise TypeError('`default_fragment_scan_options` must be either '
-                            'a dictionary or an instance of '
-                            'CsvFragmentScanOptions')
-
-    cdef void init(self, const shared_ptr[CFileFormat]& sp):
-        FileFormat.init(self, sp)
-        self.csv_format = <CCsvFileFormat*> sp.get()
-
-    def make_write_options(self):
-        raise NotImplemented("writing CSV datasets")
-
-    @property
-    def parse_options(self):
-        return ParseOptions.wrap(self.csv_format.parse_options)
-
-    @parse_options.setter
-    def parse_options(self, ParseOptions parse_options not None):
-        self.csv_format.parse_options = parse_options.options
-
-    cdef _set_default_fragment_scan_options(self, FragmentScanOptions options):
-        if options.type_name == 'csv':
-            self.csv_format.default_fragment_scan_options = options.wrapped
-        else:
-            super()._set_default_fragment_scan_options(options)
-
-    def equals(self, CsvFileFormat other):
-        return (
-            self.parse_options.equals(other.parse_options) and
-            self.default_fragment_scan_options ==
-            other.default_fragment_scan_options)
-
-    def __reduce__(self):
-        return CsvFileFormat, (self.parse_options,
-                               self.default_fragment_scan_options)
-
-    def __repr__(self):
-        return f"<CsvFileFormat parse_options={self.parse_options}>"
-
-
-cdef class CsvFragmentScanOptions(FragmentScanOptions):
-    """Scan-specific options for CSV fragments."""
-
-    cdef:
-        CCsvFragmentScanOptions* csv_options
-
-    # Avoid mistakingly creating attributes
-    __slots__ = ()
-
-    def __init__(self, ConvertOptions convert_options=None,
-                 ReadOptions read_options=None):
-        self.init(shared_ptr[CFragmentScanOptions](
-            new CCsvFragmentScanOptions()))
-        if convert_options is not None:
-            self.convert_options = convert_options
-        if read_options is not None:
-            self.read_options = read_options
-
-    cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp):
-        FragmentScanOptions.init(self, sp)
-        self.csv_options = <CCsvFragmentScanOptions*> sp.get()
-
-    @property
-    def convert_options(self):
-        return ConvertOptions.wrap(self.csv_options.convert_options)
-
-    @convert_options.setter
-    def convert_options(self, ConvertOptions convert_options not None):
-        self.csv_options.convert_options = convert_options.options
-
-    @property
-    def read_options(self):
-        return ReadOptions.wrap(self.csv_options.read_options)
-
-    @read_options.setter
-    def read_options(self, ReadOptions read_options not None):
-        self.csv_options.read_options = read_options.options
-
-    def equals(self, CsvFragmentScanOptions other):
-        return (
-            other and
-            self.convert_options.equals(other.convert_options) and
-            self.read_options.equals(other.read_options))
-
-    def __reduce__(self):
-        return CsvFragmentScanOptions, (self.convert_options,
-                                        self.read_options)
-
-
-cdef class Partitioning(_Weakrefable):
-
-    cdef:
-        shared_ptr[CPartitioning] wrapped
-        CPartitioning* partitioning
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef init(self, const shared_ptr[CPartitioning]& sp):
-        self.wrapped = sp
-        self.partitioning = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CPartitioning]& sp):
-        type_name = frombytes(sp.get().type_name())
-
-        classes = {
-            'schema': DirectoryPartitioning,
-            'hive': HivePartitioning,
-        }
-
-        class_ = classes.get(type_name, None)
-        if class_ is None:
-            raise TypeError(type_name)
-
-        cdef Partitioning self = class_.__new__(class_)
-        self.init(sp)
-        return self
-
-    cdef inline shared_ptr[CPartitioning] unwrap(self):
-        return self.wrapped
-
-    def parse(self, path):
-        cdef CResult[CExpression] result
-        result = self.partitioning.Parse(tobytes(path))
-        return Expression.wrap(GetResultValue(result))
-
-    @property
-    def schema(self):
-        """The arrow Schema attached to the partitioning."""
-        return pyarrow_wrap_schema(self.partitioning.schema())
-
-
-cdef class PartitioningFactory(_Weakrefable):
-
-    cdef:
-        shared_ptr[CPartitioningFactory] wrapped
-        CPartitioningFactory* factory
-
-    def __init__(self):
-        _forbid_instantiation(self.__class__)
-
-    cdef init(self, const shared_ptr[CPartitioningFactory]& sp):
-        self.wrapped = sp
-        self.factory = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CPartitioningFactory]& sp):
-        cdef PartitioningFactory self = PartitioningFactory.__new__(
-            PartitioningFactory
-        )
-        self.init(sp)
-        return self
-
-    cdef inline shared_ptr[CPartitioningFactory] unwrap(self):
-        return self.wrapped
-
-
-cdef vector[shared_ptr[CArray]] _partitioning_dictionaries(
-        Schema schema, dictionaries) except *:
-    cdef:
-        vector[shared_ptr[CArray]] c_dictionaries
-
-    dictionaries = dictionaries or {}
-
-    for field in schema:
-        dictionary = dictionaries.get(field.name)
-
-        if (isinstance(field.type, pa.DictionaryType) and
-                dictionary is not None):
-            c_dictionaries.push_back(pyarrow_unwrap_array(dictionary))
-        else:
-            c_dictionaries.push_back(<shared_ptr[CArray]> nullptr)
-
-    return c_dictionaries
-
-
-cdef class DirectoryPartitioning(Partitioning):
-    """
-    A Partitioning based on a specified Schema.
-
-    The DirectoryPartitioning expects one segment in the file path for each
-    field in the schema (all fields are required to be present).
-    For example given schema<year:int16, month:int8> the path "/2009/11" would
-    be parsed to ("year"_ == 2009 and "month"_ == 11).
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : Dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-
-    Returns
-    -------
-    DirectoryPartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import DirectoryPartitioning
-    >>> partition = DirectoryPartitioning(
-    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())]))
-    >>> print(partitioning.parse("/2009/11"))
-    ((year == 2009:int16) and (month == 11:int8))
-    """
-
-    cdef:
-        CDirectoryPartitioning* directory_partitioning
-
-    def __init__(self, Schema schema not None, dictionaries=None):
-        cdef:
-            shared_ptr[CDirectoryPartitioning] c_partitioning
-
-        c_partitioning = make_shared[CDirectoryPartitioning](
-            pyarrow_unwrap_schema(schema),
-            _partitioning_dictionaries(schema, dictionaries)
-        )
-        self.init(<shared_ptr[CPartitioning]> c_partitioning)
-
-    cdef init(self, const shared_ptr[CPartitioning]& sp):
-        Partitioning.init(self, sp)
-        self.directory_partitioning = <CDirectoryPartitioning*> sp.get()
-
-    @staticmethod
-    def discover(field_names=None, infer_dictionary=False,
-                 max_partition_dictionary_size=0,
-                 schema=None):
-        """
-        Discover a DirectoryPartitioning.
-
-        Parameters
-        ----------
-        field_names : list of str
-            The names to associate with the values from the subdirectory names.
-            If schema is given, will be populated from the schema.
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain types. This can be more efficient
-            when materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        max_partition_dictionary_size : int, default 0
-            Synonymous with infer_dictionary for backwards compatibility with
-            1.0: setting this to -1 or None is equivalent to passing
-            infer_dictionary=True.
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
-        cdef:
-            CPartitioningFactoryOptions c_options
-            vector[c_string] c_field_names
-
-        if max_partition_dictionary_size in {-1, None}:
-            infer_dictionary = True
-        elif max_partition_dictionary_size != 0:
-            raise NotImplemented("max_partition_dictionary_size must be "
-                                 "0, -1, or None")
-
-        if infer_dictionary:
-            c_options.infer_dictionary = True
-
-        if schema:
-            c_options.schema = pyarrow_unwrap_schema(schema)
-            c_field_names = [tobytes(f.name) for f in schema]
-        elif not field_names:
-            raise ValueError(
-                "Neither field_names nor schema was passed; "
-                "cannot infer field_names")
-        else:
-            c_field_names = [tobytes(s) for s in field_names]
-        return PartitioningFactory.wrap(
-            CDirectoryPartitioning.MakeFactory(c_field_names, c_options))
-
-
-cdef class HivePartitioning(Partitioning):
-    """
-    A Partitioning for "/$key=$value/" nested directories as found in
-    Apache Hive.
-
-    Multi-level, directory based partitioning scheme originating from
-    Apache Hive with all data files stored in the leaf directories. Data is
-    partitioned by static values of a particular column in the schema.
-    Partition keys are represented in the form $key=$value in directory names.
-    Field order is ignored, as are missing or unrecognized field names.
-
-    For example, given schema<year:int16, month:int8, day:int8>, a possible
-    path would be "/year=2009/month=11/day=15".
-
-    Parameters
-    ----------
-    schema : Schema
-        The schema that describes the partitions present in the file path.
-    dictionaries : Dict[str, Array]
-        If the type of any field of `schema` is a dictionary type, the
-        corresponding entry of `dictionaries` must be an array containing
-        every value which may be taken by the corresponding column or an
-        error will be raised in parsing.
-    null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-        If any field is None then this fallback will be used as a label
-
-    Returns
-    -------
-    HivePartitioning
-
-    Examples
-    --------
-    >>> from pyarrow.dataset import HivePartitioning
-    >>> partitioning = HivePartitioning(
-    ...     pa.schema([("year", pa.int16()), ("month", pa.int8())]))
-    >>> print(partitioning.parse("/year=2009/month=11"))
-    ((year == 2009:int16) and (month == 11:int8))
-
-    """
-
-    cdef:
-        CHivePartitioning* hive_partitioning
-
-    def __init__(self,
-                 Schema schema not None,
-                 dictionaries=None,
-                 null_fallback="__HIVE_DEFAULT_PARTITION__"):
-
-        cdef:
-            shared_ptr[CHivePartitioning] c_partitioning
-            c_string c_null_fallback = tobytes(null_fallback)
-
-        c_partitioning = make_shared[CHivePartitioning](
-            pyarrow_unwrap_schema(schema),
-            _partitioning_dictionaries(schema, dictionaries),
-            c_null_fallback
-        )
-        self.init(<shared_ptr[CPartitioning]> c_partitioning)
-
-    cdef init(self, const shared_ptr[CPartitioning]& sp):
-        Partitioning.init(self, sp)
-        self.hive_partitioning = <CHivePartitioning*> sp.get()
-
-    @staticmethod
-    def discover(infer_dictionary=False,
-                 max_partition_dictionary_size=0,
-                 null_fallback="__HIVE_DEFAULT_PARTITION__",
-                 schema=None):
-        """
-        Discover a HivePartitioning.
-
-        Parameters
-        ----------
-        infer_dictionary : bool, default False
-            When inferring a schema for partition fields, yield dictionary
-            encoded types instead of plain. This can be more efficient when
-            materializing virtual columns, and Expressions parsed by the
-            finished Partitioning will include dictionaries of all unique
-            inspected values for each field.
-        max_partition_dictionary_size : int, default 0
-            Synonymous with infer_dictionary for backwards compatibility with
-            1.0: setting this to -1 or None is equivalent to passing
-            infer_dictionary=True.
-        null_fallback : str, default "__HIVE_DEFAULT_PARTITION__"
-            When inferring a schema for partition fields this value will be
-            replaced by null.  The default is set to __HIVE_DEFAULT_PARTITION__
-            for compatibility with Spark
-        schema : Schema, default None
-            Use this schema instead of inferring a schema from partition
-            values. Partition values will be validated against this schema
-            before accumulation into the Partitioning's dictionary.
-
-        Returns
-        -------
-        PartitioningFactory
-            To be used in the FileSystemFactoryOptions.
-        """
-        cdef:
-            CHivePartitioningFactoryOptions c_options
-
-        if max_partition_dictionary_size in {-1, None}:
-            infer_dictionary = True
-        elif max_partition_dictionary_size != 0:
-            raise NotImplemented("max_partition_dictionary_size must be "
-                                 "0, -1, or None")
-
-        if infer_dictionary:
-            c_options.infer_dictionary = True
-
-        c_options.null_fallback = tobytes(null_fallback)
-
-        if schema:
-            c_options.schema = pyarrow_unwrap_schema(schema)
-
-        return PartitioningFactory.wrap(
-            CHivePartitioning.MakeFactory(c_options))
-
-
-cdef class DatasetFactory(_Weakrefable):
-    """
-    DatasetFactory is used to create a Dataset, inspect the Schema
-    of the fragments contained in it, and declare a partitioning.
-    """
-
-    cdef:
-        shared_ptr[CDatasetFactory] wrapped
-        CDatasetFactory* factory
-
-    def __init__(self, list children):
-        _forbid_instantiation(self.__class__)
-
-    cdef init(self, const shared_ptr[CDatasetFactory]& sp):
-        self.wrapped = sp
-        self.factory = sp.get()
-
-    @staticmethod
-    cdef wrap(const shared_ptr[CDatasetFactory]& sp):
-        cdef DatasetFactory self = \
-            DatasetFactory.__new__(DatasetFactory)
-        self.init(sp)
-        return self
-
-    cdef inline shared_ptr[CDatasetFactory] unwrap(self) nogil:
-        return self.wrapped
-
-    @property
-    def root_partition(self):
-        return Expression.wrap(self.factory.root_partition())
-
-    @root_partition.setter
-    def root_partition(self, Expression expr):
-        check_status(self.factory.SetRootPartition(expr.unwrap()))
-
-    def inspect_schemas(self):
-        cdef CResult[vector[shared_ptr[CSchema]]] result
-        cdef CInspectOptions options
-        with nogil:
-            result = self.factory.InspectSchemas(options)
-
-        schemas = []
-        for s in GetResultValue(result):
-            schemas.append(pyarrow_wrap_schema(s))
-        return schemas
-
-    def inspect(self):
-        """
-        Inspect all data fragments and return a common Schema.
-
-        Returns
-        -------
-        Schema
-        """
-        cdef:
-            CInspectOptions options
-            CResult[shared_ptr[CSchema]] result
-        with nogil:
-            result = self.factory.Inspect(options)
-        return pyarrow_wrap_schema(GetResultValue(result))
-
-    def finish(self, Schema schema=None):
-        """
-        Create a Dataset using the inspected schema or an explicit schema
-        (if given).
-
-        Parameters
-        ----------
-        schema: Schema, default None
-            The schema to conform the source to.  If None, the inspected
-            schema is used.
-
-        Returns
-        -------
-        Dataset
-        """
-        cdef:
-            shared_ptr[CSchema] sp_schema
-            CResult[shared_ptr[CDataset]] result
-
-        if schema is not None:
-            sp_schema = pyarrow_unwrap_schema(schema)
-            with nogil:
-                result = self.factory.FinishWithSchema(sp_schema)
-        else:
-            with nogil:
-                result = self.factory.Finish()
-
-        return Dataset.wrap(GetResultValue(result))
-
-
-cdef class FileSystemFactoryOptions(_Weakrefable):
-    """
-    Influences the discovery of filesystem paths.
-
-    Parameters
-    ----------
-    partition_base_dir : str, optional
-        For the purposes of applying the partitioning, paths will be
-        stripped of the partition_base_dir. Files not matching the
-        partition_base_dir prefix will be skipped for partitioning discovery.
-        The ignored files will still be part of the Dataset, but will not
-        have partition information.
-    partitioning: Partitioning/PartitioningFactory, optional
-       Apply the Partitioning to every discovered Fragment. See Partitioning or
-       PartitioningFactory documentation.
-    exclude_invalid_files : bool, optional (default True)
-        If True, invalid files will be excluded (file format specific check).
-        This will incur IO for each files in a serial and single threaded
-        fashion. Disabling this feature will skip the IO, but unsupported
-        files may be present in the Dataset (resulting in an error at scan
-        time).
-    selector_ignore_prefixes : list, optional
-        When discovering from a Selector (and not from an explicit file list),
-        ignore files and directories matching any of these prefixes.
-        By default this is ['.', '_'].
-    """
-
-    cdef:
-        CFileSystemFactoryOptions options
-
-    __slots__ = ()  # avoid mistakingly creating attributes
-
-    def __init__(self, partition_base_dir=None, partitioning=None,
-                 exclude_invalid_files=None,
-                 list selector_ignore_prefixes=None):
-        if isinstance(partitioning, PartitioningFactory):
-            self.partitioning_factory = partitioning
-        elif isinstance(partitioning, Partitioning):
-            self.partitioning = partitioning
-
-        if partition_base_dir is not None:
-            self.partition_base_dir = partition_base_dir
-        if exclude_invalid_files is not None:
-            self.exclude_invalid_files = exclude_invalid_files
-        if selector_ignore_prefixes is not None:
-            self.selector_ignore_prefixes = selector_ignore_prefixes
-
-    cdef inline CFileSystemFactoryOptions unwrap(self):
-        return self.options
-
-    @property
-    def partitioning(self):
-        """Partitioning to apply to discovered files.
-
-        NOTE: setting this property will overwrite partitioning_factory.
-        """
-        c_partitioning = self.options.partitioning.partitioning()
-        if c_partitioning.get() == nullptr:
-            return None
-        return Partitioning.wrap(c_partitioning)
-
-    @partitioning.setter
-    def partitioning(self, Partitioning value):
-        self.options.partitioning = (<Partitioning> value).unwrap()
-
-    @property
-    def partitioning_factory(self):
-        """PartitioningFactory to apply to discovered files and
-        discover a Partitioning.
-
-        NOTE: setting this property will overwrite partitioning.
-        """
-        c_factory = self.options.partitioning.factory()
-        if c_factory.get() == nullptr:
-            return None
-        return PartitioningFactory.wrap(c_factory)
-
-    @partitioning_factory.setter
-    def partitioning_factory(self, PartitioningFactory value):
-        self.options.partitioning = (<PartitioningFactory> value).unwrap()
-
-    @property
-    def partition_base_dir(self):
-        """
-        Base directory to strip paths before applying the partitioning.
-        """
-        return frombytes(self.options.partition_base_dir)
-
-    @partition_base_dir.setter
-    def partition_base_dir(self, value):
-        self.options.partition_base_dir = tobytes(value)
-
-    @property
-    def exclude_invalid_files(self):
-        """Whether to exclude invalid files."""
-        return self.options.exclude_invalid_files
-
-    @exclude_invalid_files.setter
-    def exclude_invalid_files(self, bint value):
-        self.options.exclude_invalid_files = value
-
-    @property
-    def selector_ignore_prefixes(self):
-        """
-        List of prefixes. Files matching one of those prefixes will be
-        ignored by the discovery process.
-        """
-        return [frombytes(p) for p in self.options.selector_ignore_prefixes]
-
-    @selector_ignore_prefixes.setter
-    def selector_ignore_prefixes(self, values):
-        self.options.selector_ignore_prefixes = [tobytes(v) for v in values]
-
-
-cdef class FileSystemDatasetFactory(DatasetFactory):
-    """
-    Create a DatasetFactory from a list of paths with schema inspection.
-
-    Parameters
-    ----------
-    filesystem : pyarrow.fs.FileSystem
-        Filesystem to discover.
-    paths_or_selector: pyarrow.fs.Selector or list of path-likes
-        Either a Selector object or a list of path-like objects.
-    format : FileFormat
-        Currently only ParquetFileFormat and IpcFileFormat are supported.
-    options : FileSystemFactoryOptions, optional
-        Various flags influencing the discovery of filesystem paths.
-    """
-
-    cdef:
-        CFileSystemDatasetFactory* filesystem_factory
-
-    def __init__(self, FileSystem filesystem not None, paths_or_selector,
-                 FileFormat format not None,
-                 FileSystemFactoryOptions options=None):
-        cdef:
-            vector[c_string] paths
-            CFileSelector c_selector
-            CResult[shared_ptr[CDatasetFactory]] result
-            shared_ptr[CFileSystem] c_filesystem
-            shared_ptr[CFileFormat] c_format
-            CFileSystemFactoryOptions c_options
-
-        options = options or FileSystemFactoryOptions()
-        c_options = options.unwrap()
-        c_filesystem = filesystem.unwrap()
-        c_format = format.unwrap()
-
-        if isinstance(paths_or_selector, FileSelector):
-            with nogil:
-                c_selector = (<FileSelector> paths_or_selector).selector
-                result = CFileSystemDatasetFactory.MakeFromSelector(
-                    c_filesystem,
-                    c_selector,
-                    c_format,
-                    c_options
-                )
-        elif isinstance(paths_or_selector, (list, tuple)):
-            paths = [tobytes(s) for s in paths_or_selector]
-            with nogil:
-                result = CFileSystemDatasetFactory.MakeFromPaths(
-                    c_filesystem,
-                    paths,
-                    c_format,
-                    c_options
-                )
-        else:
-            raise TypeError('Must pass either paths or a FileSelector, but '
-                            'passed {}'.format(type(paths_or_selector)))
-
-        self.init(GetResultValue(result))
-
-    cdef init(self, shared_ptr[CDatasetFactory]& sp):
-        DatasetFactory.init(self, sp)
-        self.filesystem_factory = <CFileSystemDatasetFactory*> sp.get()
-
-
-cdef class UnionDatasetFactory(DatasetFactory):
-    """
-    Provides a way to inspect/discover a Dataset's expected schema before
-    materialization.
-
-    Parameters
-    ----------
-    factories : list of DatasetFactory
-    """
-
-    cdef:
-        CUnionDatasetFactory* union_factory
-
-    def __init__(self, list factories):
-        cdef:
-            DatasetFactory factory
-            vector[shared_ptr[CDatasetFactory]] c_factories
-        for factory in factories:
-            c_factories.push_back(factory.unwrap())
-        self.init(GetResultValue(CUnionDatasetFactory.Make(c_factories)))
-
-    cdef init(self, const shared_ptr[CDatasetFactory]& sp):
-        DatasetFactory.init(self, sp)
-        self.union_factory = <CUnionDatasetFactory*> sp.get()
-
-
-cdef class ParquetFactoryOptions(_Weakrefable):
-    """
-    Influences the discovery of parquet dataset.
-
-    Parameters
-    ----------
-    partition_base_dir : str, optional
-        For the purposes of applying the partitioning, paths will be
-        stripped of the partition_base_dir. Files not matching the
-        partition_base_dir prefix will be skipped for partitioning discovery.
-        The ignored files will still be part of the Dataset, but will not
-        have partition information.
-    partitioning : Partitioning, PartitioningFactory, optional
-        The partitioning scheme applied to fragments, see ``Partitioning``.
-    validate_column_chunk_paths : bool, default False
-        Assert that all ColumnChunk paths are consistent. The parquet spec
-        allows for ColumnChunk data to be stored in multiple files, but
-        ParquetDatasetFactory supports only a single file with all ColumnChunk
-        data. If this flag is set construction of a ParquetDatasetFactory will
-        raise an error if ColumnChunk data is not resident in a single file.
-    """
-
-    cdef:
-        CParquetFactoryOptions options
-
-    __slots__ = ()  # avoid mistakingly creating attributes
-
-    def __init__(self, partition_base_dir=None, partitioning=None,
-                 validate_column_chunk_paths=False):
-        if isinstance(partitioning, PartitioningFactory):
-            self.partitioning_factory = partitioning
-        elif isinstance(partitioning, Partitioning):
-            self.partitioning = partitioning
-
-        if partition_base_dir is not None:
-            self.partition_base_dir = partition_base_dir
-
-        self.options.validate_column_chunk_paths = validate_column_chunk_paths
-
-    cdef inline CParquetFactoryOptions unwrap(self):
-        return self.options
-
-    @property
-    def partitioning(self):
-        """Partitioning to apply to discovered files.
-
-        NOTE: setting this property will overwrite partitioning_factory.
-        """
-        c_partitioning = self.options.partitioning.partitioning()
-        if c_partitioning.get() == nullptr:
... 78503 lines suppressed ...