You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/13 19:44:55 UTC

[4/4] arrow git commit: ARROW-819: Public Cython and C++ API in the style of lxml, arrow::py::import_pyarrow method

ARROW-819: Public Cython and C++ API in the style of lxml, arrow::py::import_pyarrow method

I have been looking at LXML's approach to creating both a public Cython API and C++ API

https://github.com/lxml/lxml

While this may seem like a somewhat radical reorganization of the code, putting all of the main symbols in a single Cython extension makes generating a C++ API for them significantly simpler. By using `.pxi` files we can break the codebase into as small pieces as we like (as long as there are no circular dependencies). As a convenient side effect, the build times are shorter.

Author: Wes McKinney <we...@twosigma.com>

Closes #680 from wesm/ARROW-819 and squashes the following commits:

9e6ee246 [Wes McKinney] Fix up optional extensions
cff757de [Wes McKinney] Expose pyarrow C API in arrow/python/pyarrow.h
b39d19cd [Wes McKinney] Fix test suite. Move _config into lib
ff1b5e51 [Wes McKinney] Rename things a bit
d4a83912 [Wes McKinney] Reorganize Cython code in the style of lxml so make declaring a public C API easier


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9e875a68
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9e875a68
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9e875a68

Branch: refs/heads/master
Commit: 9e875a6843b7bd155f7e10d011f5e8d25a47494c
Parents: 95ee96b
Author: Wes McKinney <we...@twosigma.com>
Authored: Sat May 13 15:44:43 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sat May 13 15:44:43 2017 -0400

----------------------------------------------------------------------
 cpp/CMakeLists.txt                   |    4 +-
 cpp/src/arrow/python/CMakeLists.txt  |    2 +
 cpp/src/arrow/python/pyarrow.cc      |   75 ++
 cpp/src/arrow/python/pyarrow.h       |   55 +
 cpp/src/arrow/python/pyarrow_api.h   |  143 +++
 python/CMakeLists.txt                |    7 +-
 python/pyarrow/__init__.pxd          |   34 +
 python/pyarrow/__init__.py           |  112 +-
 python/pyarrow/_array.pxd            |  247 -----
 python/pyarrow/_array.pyx            | 1631 -----------------------------
 python/pyarrow/_error.pxd            |   20 -
 python/pyarrow/_error.pyx            |   70 --
 python/pyarrow/_io.pxd               |   50 -
 python/pyarrow/_io.pyx               | 1274 ----------------------
 python/pyarrow/_jemalloc.pyx         |    2 +-
 python/pyarrow/_memory.pxd           |   30 -
 python/pyarrow/_memory.pyx           |   58 -
 python/pyarrow/_parquet.pyx          |   22 +-
 python/pyarrow/_table.pxd            |   62 --
 python/pyarrow/_table.pyx            |  926 ----------------
 python/pyarrow/array.pxi             | 1549 +++++++++++++++++++++++++++
 python/pyarrow/error.pxi             |   70 ++
 python/pyarrow/feather.py            |    6 +-
 python/pyarrow/filesystem.py         |   14 +-
 python/pyarrow/formatting.py         |    4 +-
 python/pyarrow/includes/libarrow.pxd |   61 +-
 python/pyarrow/includes/pyarrow.pxd  |   75 --
 python/pyarrow/io.pxi                | 1253 ++++++++++++++++++++++
 python/pyarrow/ipc.py                |   10 +-
 python/pyarrow/lib.pxd               |  337 ++++++
 python/pyarrow/lib.pyx               |   88 ++
 python/pyarrow/memory.pxi            |   55 +
 python/pyarrow/parquet.py            |   13 +-
 python/pyarrow/public-api.pxi        |  107 ++
 python/pyarrow/table.pxi             |  884 ++++++++++++++++
 python/pyarrow/tests/test_feather.py |    2 +-
 python/setup.py                      |    9 +-
 37 files changed, 4809 insertions(+), 4552 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2146379..6b2ceec 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -852,7 +852,8 @@ if (UNIX)
     )
 
   FOREACH(item ${LINT_FILES})
-    IF(NOT (item MATCHES "_generated.h"))
+    IF(NOT ((item MATCHES "_generated.h") OR
+            (item MATCHES "pyarrow_api.h")))
       LIST(APPEND FILTERED_LINT_FILES ${item})
     ENDIF()
   ENDFOREACH(item ${LINT_FILES})
@@ -878,6 +879,7 @@ if (${CLANG_FORMAT_FOUND})
     `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
     sed -e '/_generated/g' |
     sed -e '/windows_compatibility.h/g' |
+    sed -e '/pyarrow_api.h/g' |
     sed -e '/config.h/g' |   # python/config.h
     sed -e '/platform.h/g'`  # python/platform.h
     )

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index c5cbc50..3085229 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -50,6 +50,7 @@ set(ARROW_PYTHON_SRCS
   io.cc
   numpy_convert.cc
   pandas_convert.cc
+  pyarrow.cc
 )
 
 set(ARROW_PYTHON_SHARED_LINK_LIBS
@@ -90,6 +91,7 @@ install(FILES
   numpy_interop.h
   pandas_convert.h
   platform.h
+  pyarrow.h
   type_traits.h
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/python")
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/cpp/src/arrow/python/pyarrow.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc
new file mode 100644
index 0000000..56c0381
--- /dev/null
+++ b/cpp/src/arrow/python/pyarrow.cc
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/pyarrow.h"
+
+#include <memory>
+
+#include "arrow/array.h"
+#include "arrow/table.h"
+#include "arrow/tensor.h"
+#include "arrow/type.h"
+
+namespace {
+#include "arrow/python/pyarrow_api.h"
+}
+
+namespace arrow {
+namespace py {
+
+int import_pyarrow() {
+  return ::import_pyarrow__lib();
+}
+
+PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer) {
+  return ::pyarrow_wrap_buffer(buffer);
+}
+
+PyObject* wrap_data_type(const std::shared_ptr<DataType>& type) {
+  return ::pyarrow_wrap_data_type(type);
+}
+
+PyObject* wrap_field(const std::shared_ptr<Field>& field) {
+  return ::pyarrow_wrap_field(field);
+}
+
+PyObject* wrap_schema(const std::shared_ptr<Schema>& schema) {
+  return ::pyarrow_wrap_schema(schema);
+}
+
+PyObject* wrap_array(const std::shared_ptr<Array>& array) {
+  return ::pyarrow_wrap_array(array);
+}
+
+PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor) {
+  return ::pyarrow_wrap_tensor(tensor);
+}
+
+PyObject* wrap_column(const std::shared_ptr<Column>& column) {
+  return ::pyarrow_wrap_column(column);
+}
+
+PyObject* wrap_table(const std::shared_ptr<Table>& table) {
+  return ::pyarrow_wrap_table(table);
+}
+
+PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch) {
+  return ::pyarrow_wrap_batch(batch);
+}
+
+}  // namespace py
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/cpp/src/arrow/python/pyarrow.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
new file mode 100644
index 0000000..7c618ce
--- /dev/null
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_PYARROW_H
+#define ARROW_PYTHON_PYARROW_H
+
+#include "arrow/python/platform.h"
+
+#include <memory>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class Buffer;
+class Column;
+class DataType;
+class Field;
+class RecordBatch;
+class Schema;
+class Table;
+class Tensor;
+
+namespace py {
+
+ARROW_EXPORT int import_pyarrow();
+ARROW_EXPORT PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer);
+ARROW_EXPORT PyObject* wrap_data_type(const std::shared_ptr<DataType>& type);
+ARROW_EXPORT PyObject* wrap_field(const std::shared_ptr<Field>& field);
+ARROW_EXPORT PyObject* wrap_schema(const std::shared_ptr<Schema>& schema);
+ARROW_EXPORT PyObject* wrap_array(const std::shared_ptr<Array>& array);
+ARROW_EXPORT PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor);
+ARROW_EXPORT PyObject* wrap_column(const std::shared_ptr<Column>& column);
+ARROW_EXPORT PyObject* wrap_table(const std::shared_ptr<Table>& table);
+ARROW_EXPORT PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch);
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // ARROW_PYTHON_PYARROW_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/cpp/src/arrow/python/pyarrow_api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pyarrow_api.h b/cpp/src/arrow/python/pyarrow_api.h
new file mode 100644
index 0000000..7b70844
--- /dev/null
+++ b/cpp/src/arrow/python/pyarrow_api.h
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// DO NOT EDIT THIS FILE. Update from pyarrow/lib_api.h after pyarrow build
+
+/* Generated by Cython 0.25.2 */
+
+#ifndef __PYX_HAVE_API__pyarrow__lib
+#define __PYX_HAVE_API__pyarrow__lib
+#include "Python.h"
+
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer>  const &) = 0;
+#define pyarrow_wrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type)(std::shared_ptr< arrow::DataType>  const &) = 0;
+#define pyarrow_wrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field)(std::shared_ptr< arrow::Field>  const &) = 0;
+#define pyarrow_wrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema)(std::shared_ptr< arrow::Schema>  const &) = 0;
+#define pyarrow_wrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array)(std::shared_ptr< arrow::Array>  const &) = 0;
+#define pyarrow_wrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor)(std::shared_ptr< arrow::Tensor>  const &) = 0;
+#define pyarrow_wrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column)(std::shared_ptr< arrow::Column>  const &) = 0;
+#define pyarrow_wrap_column __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table)(std::shared_ptr< arrow::Table>  const &) = 0;
+#define pyarrow_wrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr< arrow::RecordBatch>  const &) = 0;
+#define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
+#if !defined(__Pyx_PyIdentifier_FromString)
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
+#else
+  #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
+#endif
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportModule
+#define __PYX_HAVE_RT_ImportModule
+static PyObject *__Pyx_ImportModule(const char *name) {
+    PyObject *py_name = 0;
+    PyObject *py_module = 0;
+    py_name = __Pyx_PyIdentifier_FromString(name);
+    if (!py_name)
+        goto bad;
+    py_module = PyImport_Import(py_name);
+    Py_DECREF(py_name);
+    return py_module;
+bad:
+    Py_XDECREF(py_name);
+    return 0;
+}
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportFunction
+#define __PYX_HAVE_RT_ImportFunction
+static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
+    PyObject *d = 0;
+    PyObject *cobj = 0;
+    union {
+        void (*fp)(void);
+        void *p;
+    } tmp;
+    d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
+    if (!d)
+        goto bad;
+    cobj = PyDict_GetItemString(d, funcname);
+    if (!cobj) {
+        PyErr_Format(PyExc_ImportError,
+            "%.200s does not export expected C function %.200s",
+                PyModule_GetName(module), funcname);
+        goto bad;
+    }
+#if PY_VERSION_HEX >= 0x02070000
+    if (!PyCapsule_IsValid(cobj, sig)) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
+        goto bad;
+    }
+    tmp.p = PyCapsule_GetPointer(cobj, sig);
+#else
+    {const char *desc, *s1, *s2;
+    desc = (const char *)PyCObject_GetDesc(cobj);
+    if (!desc)
+        goto bad;
+    s1 = desc; s2 = sig;
+    while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
+    if (*s1 != *s2) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, desc);
+        goto bad;
+    }
+    tmp.p = PyCObject_AsVoidPtr(cobj);}
+#endif
+    *f = tmp.fp;
+    if (!(*f))
+        goto bad;
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(d);
+    return -1;
+}
+#endif
+
+
+static int import_pyarrow__lib(void) {
+  PyObject *module = 0;
+  module = __Pyx_ImportModule("pyarrow.lib");
+  if (!module) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column, "PyObject *(std::shared_ptr< arrow::Column>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch>  const &)") < 0) goto bad;
+  Py_DECREF(module); module = 0;
+  return 0;
+  bad:
+  Py_XDECREF(module);
+  return -1;
+}
+
+#endif /* !__PYX_HAVE_API__pyarrow__lib */

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index d828710..123dd5d 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -265,12 +265,7 @@ if (UNIX)
 endif()
 
 set(CYTHON_EXTENSIONS
-  _array
-  _config
-  _error
-  _io
-  _memory
-  _table
+  lib
 )
 
 set(LINK_LIBS

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/__init__.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
new file mode 100644
index 0000000..4f43455
--- /dev/null
+++ b/python/pyarrow/__init__.pxd
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libcpp.memory cimport shared_ptr
+from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, CDataType,
+                                        CField, CRecordBatch, CSchema,
+                                        CTable, CTensor)
+
+
+cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
+    cdef int import_pyarrow() except -1
+    cdef object wrap_buffer(const shared_ptr[CBuffer]& buffer)
+    cdef object wrap_data_type(const shared_ptr[CDataType]& type)
+    cdef object wrap_field(const shared_ptr[CField]& field)
+    cdef object wrap_schema(const shared_ptr[CSchema]& schema)
+    cdef object wrap_array(const shared_ptr[CArray]& sp_array)
+    cdef object wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+    cdef object wrap_column(const shared_ptr[CColumn]& ccolumn)
+    cdef object wrap_table(const shared_ptr[CTable]& ctable)
+    cdef object wrap_batch(const shared_ptr[CRecordBatch]& cbatch)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 4d8da9f..7d79811 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -25,53 +25,51 @@ except DistributionNotFound:
    pass
 
 
-import pyarrow._config
-from pyarrow._config import cpu_count, set_cpu_count
+from pyarrow.lib import cpu_count, set_cpu_count
+from pyarrow.lib import (null, bool_,
+                         int8, int16, int32, int64,
+                         uint8, uint16, uint32, uint64,
+                         time32, time64, timestamp, date32, date64,
+                         float16, float32, float64,
+                         binary, string, decimal,
+                         list_, struct, dictionary, field,
+                         DataType,
+                         DecimalType,
+                         DictionaryType,
+                         FixedSizeBinaryType,
+                         TimestampType,
+                         Time32Type,
+                         Time64Type,
+                         Field,
+                         Schema,
+                         schema,
+                         Array, Tensor,
+                         array,
+                         from_numpy_dtype,
+                         NullArray,
+                         NumericArray, IntegerArray, FloatingPointArray,
+                         BooleanArray,
+                         Int8Array, UInt8Array,
+                         Int16Array, UInt16Array,
+                         Int32Array, UInt32Array,
+                         Int64Array, UInt64Array,
+                         ListArray,
+                         BinaryArray, StringArray,
+                         FixedSizeBinaryArray,
+                         DictionaryArray,
+                         Date32Array, Date64Array,
+                         TimestampArray, Time32Array, Time64Array,
+                         DecimalArray,
+                         ArrayValue, Scalar, NA, NAType,
+                         BooleanValue,
+                         Int8Value, Int16Value, Int32Value, Int64Value,
+                         UInt8Value, UInt16Value, UInt32Value, UInt64Value,
+                         FloatValue, DoubleValue, ListValue,
+                         BinaryValue, StringValue, FixedSizeBinaryValue,
+                         DecimalValue,
+                         Date32Value, Date64Value, TimestampValue)
 
-from pyarrow._array import (null, bool_,
-                            int8, int16, int32, int64,
-                            uint8, uint16, uint32, uint64,
-                            time32, time64, timestamp, date32, date64,
-                            float16, float32, float64,
-                            binary, string, decimal,
-                            list_, struct, dictionary, field,
-                            DataType,
-                            DecimalType,
-                            DictionaryType,
-                            FixedSizeBinaryType,
-                            TimestampType,
-                            Time32Type,
-                            Time64Type,
-                            Field,
-                            Schema,
-                            schema,
-                            Array, Tensor,
-                            array,
-                            from_numpy_dtype,
-                            NullArray,
-                            NumericArray, IntegerArray, FloatingPointArray,
-                            BooleanArray,
-                            Int8Array, UInt8Array,
-                            Int16Array, UInt16Array,
-                            Int32Array, UInt32Array,
-                            Int64Array, UInt64Array,
-                            ListArray,
-                            BinaryArray, StringArray,
-                            FixedSizeBinaryArray,
-                            DictionaryArray,
-                            Date32Array, Date64Array,
-                            TimestampArray, Time32Array, Time64Array,
-                            DecimalArray,
-                            ArrayValue, Scalar, NA, NAType,
-                            BooleanValue,
-                            Int8Value, Int16Value, Int32Value, Int64Value,
-                            UInt8Value, UInt16Value, UInt32Value, UInt64Value,
-                            FloatValue, DoubleValue, ListValue,
-                            BinaryValue, StringValue, FixedSizeBinaryValue,
-                            DecimalValue,
-                            Date32Value, Date64Value, TimestampValue)
-
-from pyarrow._io import (HdfsFile, NativeFile, PythonFile,
+from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
                          Buffer, BufferReader, InMemoryOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
                          frombuffer, read_tensor, write_tensor,
@@ -79,17 +77,17 @@ from pyarrow._io import (HdfsFile, NativeFile, PythonFile,
                          get_record_batch_size, get_tensor_size,
                          have_libhdfs, have_libhdfs3)
 
-from pyarrow._memory import (MemoryPool, total_allocated_bytes,
-                             set_memory_pool, default_memory_pool)
-from pyarrow._table import (ChunkedArray, Column, RecordBatch, Table,
-                            concat_tables)
-from pyarrow._error import (ArrowException,
-                            ArrowKeyError,
-                            ArrowInvalid,
-                            ArrowIOError,
-                            ArrowMemoryError,
-                            ArrowNotImplementedError,
-                            ArrowTypeError)
+from pyarrow.lib import (MemoryPool, total_allocated_bytes,
+                         set_memory_pool, default_memory_pool)
+from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
+                         concat_tables)
+from pyarrow.lib import (ArrowException,
+                         ArrowKeyError,
+                         ArrowInvalid,
+                         ArrowIOError,
+                         ArrowMemoryError,
+                         ArrowNotImplementedError,
+                         ArrowTypeError)
 
 
 def jemalloc_memory_pool():

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/_array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pxd b/python/pyarrow/_array.pxd
deleted file mode 100644
index 464de31..0000000
--- a/python/pyarrow/_array.pxd
+++ /dev/null
@@ -1,247 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-
-from cpython cimport PyObject
-
-cdef extern from "Python.h":
-    int PySlice_Check(object)
-
-
-cdef class DataType:
-    cdef:
-        shared_ptr[CDataType] sp_type
-        CDataType* type
-
-    cdef void init(self, const shared_ptr[CDataType]& type)
-
-
-cdef class DictionaryType(DataType):
-    cdef:
-        const CDictionaryType* dict_type
-
-
-cdef class TimestampType(DataType):
-    cdef:
-        const CTimestampType* ts_type
-
-
-cdef class Time32Type(DataType):
-    cdef:
-        const CTime32Type* time_type
-
-
-cdef class Time64Type(DataType):
-    cdef:
-        const CTime64Type* time_type
-
-
-cdef class FixedSizeBinaryType(DataType):
-    cdef:
-        const CFixedSizeBinaryType* fixed_size_binary_type
-
-
-cdef class DecimalType(FixedSizeBinaryType):
-    cdef:
-        const CDecimalType* decimal_type
-
-
-cdef class Field:
-    cdef:
-        shared_ptr[CField] sp_field
-        CField* field
-
-    cdef readonly:
-        DataType type
-
-    cdef init(self, const shared_ptr[CField]& field)
-
-
-cdef class Schema:
-    cdef:
-        shared_ptr[CSchema] sp_schema
-        CSchema* schema
-
-    cdef init(self, const vector[shared_ptr[CField]]& fields)
-    cdef init_schema(self, const shared_ptr[CSchema]& schema)
-
-
-cdef class Scalar:
-    cdef readonly:
-        DataType type
-
-
-cdef class NAType(Scalar):
-    pass
-
-
-cdef class ArrayValue(Scalar):
-    cdef:
-        shared_ptr[CArray] sp_array
-        int64_t index
-
-    cdef void init(self, DataType type,
-                   const shared_ptr[CArray]& sp_array, int64_t index)
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array)
-
-
-cdef class Int8Value(ArrayValue):
-    pass
-
-
-cdef class Int64Value(ArrayValue):
-    pass
-
-
-cdef class ListValue(ArrayValue):
-    cdef readonly:
-        DataType value_type
-
-    cdef:
-        CListArray* ap
-
-    cdef getitem(self, int64_t i)
-
-
-cdef class StringValue(ArrayValue):
-    pass
-
-
-cdef class FixedSizeBinaryValue(ArrayValue):
-    pass
-
-
-cdef class Array:
-    cdef:
-        shared_ptr[CArray] sp_array
-        CArray* ap
-
-    cdef readonly:
-        DataType type
-
-    cdef init(self, const shared_ptr[CArray]& sp_array)
-    cdef getitem(self, int64_t i)
-
-
-cdef class Tensor:
-    cdef:
-        shared_ptr[CTensor] sp_tensor
-        CTensor* tp
-
-    cdef readonly:
-        DataType type
-
-    cdef init(self, const shared_ptr[CTensor]& sp_tensor)
-
-
-cdef class NullArray(Array):
-    pass
-
-
-cdef class BooleanArray(Array):
-    pass
-
-
-cdef class NumericArray(Array):
-    pass
-
-
-cdef class IntegerArray(NumericArray):
-    pass
-
-
-cdef class FloatingPointArray(NumericArray):
-    pass
-
-
-cdef class Int8Array(IntegerArray):
-    pass
-
-
-cdef class UInt8Array(IntegerArray):
-    pass
-
-
-cdef class Int16Array(IntegerArray):
-    pass
-
-
-cdef class UInt16Array(IntegerArray):
-    pass
-
-
-cdef class Int32Array(IntegerArray):
-    pass
-
-
-cdef class UInt32Array(IntegerArray):
-    pass
-
-
-cdef class Int64Array(IntegerArray):
-    pass
-
-
-cdef class UInt64Array(IntegerArray):
-    pass
-
-
-cdef class FloatArray(FloatingPointArray):
-    pass
-
-
-cdef class DoubleArray(FloatingPointArray):
-    pass
-
-
-cdef class FixedSizeBinaryArray(Array):
-    pass
-
-
-cdef class DecimalArray(FixedSizeBinaryArray):
-    pass
-
-
-cdef class ListArray(Array):
-    pass
-
-
-cdef class StringArray(Array):
-    pass
-
-
-cdef class BinaryArray(Array):
-    pass
-
-
-cdef class DictionaryArray(Array):
-    cdef:
-        object _indices, _dictionary
-
-
-cdef wrap_array_output(PyObject* output)
-cdef DataType box_data_type(const shared_ptr[CDataType]& type)
-cdef Field box_field(const shared_ptr[CField]& field)
-cdef Schema box_schema(const shared_ptr[CSchema]& schema)
-cdef object box_array(const shared_ptr[CArray]& sp_array)
-cdef object box_tensor(const shared_ptr[CTensor]& sp_tensor)
-cdef object box_scalar(DataType type,
-                       const shared_ptr[CArray]& sp_array,
-                       int64_t index)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/_array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pyx b/python/pyarrow/_array.pyx
deleted file mode 100644
index f01cff6..0000000
--- a/python/pyarrow/_array.pyx
+++ /dev/null
@@ -1,1631 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from cython.operator cimport dereference as deref
-from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.common cimport PyObject_to_object
-cimport pyarrow.includes.pyarrow as pyarrow
-from pyarrow._error cimport check_status
-from pyarrow._memory cimport MemoryPool, maybe_unbox_memory_pool
-cimport cpython as cp
-
-
-import datetime
-import decimal as _pydecimal
-import numpy as np
-import six
-import pyarrow._config
-from pyarrow.compat import frombytes, tobytes, PandasSeries, Categorical
-
-
-cdef _pandas():
-    import pandas as pd
-    return pd
-
-
-# These are imprecise because the type (in pandas 0.x) depends on the presence
-# of nulls
-_pandas_type_map = {
-    _Type_NA: np.float64,  # NaNs
-    _Type_BOOL: np.bool_,
-    _Type_INT8: np.int8,
-    _Type_INT16: np.int16,
-    _Type_INT32: np.int32,
-    _Type_INT64: np.int64,
-    _Type_UINT8: np.uint8,
-    _Type_UINT16: np.uint16,
-    _Type_UINT32: np.uint32,
-    _Type_UINT64: np.uint64,
-    _Type_HALF_FLOAT: np.float16,
-    _Type_FLOAT: np.float32,
-    _Type_DOUBLE: np.float64,
-    _Type_DATE32: np.dtype('datetime64[ns]'),
-    _Type_DATE64: np.dtype('datetime64[ns]'),
-    _Type_TIMESTAMP: np.dtype('datetime64[ns]'),
-    _Type_BINARY: np.object_,
-    _Type_FIXED_SIZE_BINARY: np.object_,
-    _Type_STRING: np.object_,
-    _Type_LIST: np.object_
-}
-
-cdef class DataType:
-
-    def __cinit__(self):
-        pass
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        self.sp_type = type
-        self.type = type.get()
-
-    def __str__(self):
-        return frombytes(self.type.ToString())
-
-    def __repr__(self):
-        return '{0.__class__.__name__}({0})'.format(self)
-
-    def __richcmp__(DataType self, DataType other, int op):
-        if op == cp.Py_EQ:
-            return self.type.Equals(deref(other.type))
-        elif op == cp.Py_NE:
-            return not self.type.Equals(deref(other.type))
-        else:
-            raise TypeError('Invalid comparison')
-
-    def to_pandas_dtype(self):
-        """
-        Return the NumPy dtype that would be used for storing this
-        """
-        cdef Type type_id = self.type.id()
-        if type_id in _pandas_type_map:
-            return _pandas_type_map[type_id]
-        else:
-            raise NotImplementedError(str(self))
-
-
-cdef class DictionaryType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.dict_type = <const CDictionaryType*> type.get()
-
-
-cdef class TimestampType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.ts_type = <const CTimestampType*> type.get()
-
-    property unit:
-
-        def __get__(self):
-            return timeunit_to_string(self.ts_type.unit())
-
-    property tz:
-
-        def __get__(self):
-            if self.ts_type.timezone().size() > 0:
-                return frombytes(self.ts_type.timezone())
-            else:
-                return None
-
-
-cdef class Time32Type(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.time_type = <const CTime32Type*> type.get()
-
-    property unit:
-
-        def __get__(self):
-            return timeunit_to_string(self.time_type.unit())
-
-
-cdef class Time64Type(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.time_type = <const CTime64Type*> type.get()
-
-    property unit:
-
-        def __get__(self):
-            return timeunit_to_string(self.time_type.unit())
-
-
-cdef class FixedSizeBinaryType(DataType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.fixed_size_binary_type = (
-            <const CFixedSizeBinaryType*> type.get())
-
-    property byte_width:
-
-        def __get__(self):
-            return self.fixed_size_binary_type.byte_width()
-
-
-cdef class DecimalType(FixedSizeBinaryType):
-
-    cdef void init(self, const shared_ptr[CDataType]& type):
-        DataType.init(self, type)
-        self.decimal_type = <const CDecimalType*> type.get()
-
-
-cdef class Field:
-    """
-    Represents a named field, with a data type, nullability, and optional
-    metadata
-
-    Notes
-    -----
-    Do not use this class's constructor directly; use pyarrow.field
-    """
-    def __cinit__(self):
-        pass
-
-    cdef init(self, const shared_ptr[CField]& field):
-        self.sp_field = field
-        self.field = field.get()
-        self.type = box_data_type(field.get().type())
-
-    def equals(self, Field other):
-        """
-        Test if this field is equal to the other
-        """
-        return self.field.Equals(deref(other.field))
-
-    def __str__(self):
-        self._check_null()
-        return 'pyarrow.Field<{0}>'.format(frombytes(self.field.ToString()))
-
-    def __repr__(self):
-        return self.__str__()
-
-    property nullable:
-
-        def __get__(self):
-            self._check_null()
-            return self.field.nullable()
-
-    property name:
-
-        def __get__(self):
-            self._check_null()
-            return frombytes(self.field.name())
-
-    property metadata:
-
-        def __get__(self):
-            self._check_null()
-            return box_metadata(self.field.metadata().get())
-
-    def _check_null(self):
-        if self.field == NULL:
-            raise ReferenceError(
-                'Field not initialized (references NULL pointer)')
-
-    def add_metadata(self, dict metadata):
-        """
-        Add metadata as dict of string keys and values to Field
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-
-        Returns
-        -------
-        field : pyarrow.Field
-        """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
-        convert_metadata(metadata, &c_meta)
-
-        cdef shared_ptr[CField] new_field
-        with nogil:
-            check_status(self.field.AddMetadata(c_meta, &new_field))
-
-        return box_field(new_field)
-
-    def remove_metadata(self):
-        """
-        Create new field without metadata, if any
-
-        Returns
-        -------
-        field : pyarrow.Field
-        """
-        cdef shared_ptr[CField] new_field
-        with nogil:
-            new_field = self.field.RemoveMetadata()
-        return box_field(new_field)
-
-
-cdef class Schema:
-
-    def __cinit__(self):
-        pass
-
-    def __len__(self):
-        return self.schema.num_fields()
-
-    def __getitem__(self, i):
-        if i < 0 or i >= len(self):
-            raise IndexError("{0} is out of bounds".format(i))
-
-        cdef Field result = Field()
-        result.init(self.schema.field(i))
-        result.type = box_data_type(result.field.type())
-
-        return result
-
-    cdef init(self, const vector[shared_ptr[CField]]& fields):
-        self.schema = new CSchema(fields)
-        self.sp_schema.reset(self.schema)
-
-    cdef init_schema(self, const shared_ptr[CSchema]& schema):
-        self.schema = schema.get()
-        self.sp_schema = schema
-
-    property names:
-
-        def __get__(self):
-            cdef int i
-            result = []
-            for i in range(self.schema.num_fields()):
-                name = frombytes(self.schema.field(i).get().name())
-                result.append(name)
-            return result
-
-    property metadata:
-
-        def __get__(self):
-            return box_metadata(self.schema.metadata().get())
-
-    def equals(self, other):
-        """
-        Test if this schema is equal to the other
-        """
-        cdef Schema _other
-        _other = other
-
-        return self.sp_schema.get().Equals(deref(_other.schema))
-
-    def field_by_name(self, name):
-        """
-        Access a field by its name rather than the column index.
-
-        Parameters
-        ----------
-        name: str
-
-        Returns
-        -------
-        field: pyarrow.Field
-        """
-        return box_field(self.schema.GetFieldByName(tobytes(name)))
-
-    def add_metadata(self, dict metadata):
-        """
-        Add metadata as dict of string keys and values to Schema
-
-        Parameters
-        ----------
-        metadata : dict
-            Keys and values must be string-like / coercible to bytes
-
-        Returns
-        -------
-        schema : pyarrow.Schema
-        """
-        cdef shared_ptr[CKeyValueMetadata] c_meta
-        convert_metadata(metadata, &c_meta)
-
-        cdef shared_ptr[CSchema] new_schema
-        with nogil:
-            check_status(self.schema.AddMetadata(c_meta, &new_schema))
-
-        return box_schema(new_schema)
-
-    def remove_metadata(self):
-        """
-        Create new schema without metadata, if any
-
-        Returns
-        -------
-        schema : pyarrow.Schema
-        """
-        cdef shared_ptr[CSchema] new_schema
-        with nogil:
-            new_schema = self.schema.RemoveMetadata()
-        return box_schema(new_schema)
-
-    def __str__(self):
-        return frombytes(self.schema.ToString())
-
-    def __repr__(self):
-        return self.__str__()
-
-
-cdef box_metadata(const CKeyValueMetadata* metadata):
-    cdef unordered_map[c_string, c_string] result
-    if metadata != NULL:
-        metadata.ToUnorderedMap(&result)
-        return result
-    else:
-        return None
-
-
-cdef dict _type_cache = {}
-
-
-cdef DataType primitive_type(Type type):
-    if type in _type_cache:
-        return _type_cache[type]
-
-    cdef DataType out = DataType()
-    out.init(pyarrow.GetPrimitiveType(type))
-
-    _type_cache[type] = out
-    return out
-
-#------------------------------------------------------------
-# Type factory functions
-
-cdef int convert_metadata(dict metadata,
-                          shared_ptr[CKeyValueMetadata]* out) except -1:
-    cdef:
-        shared_ptr[CKeyValueMetadata] meta = (
-            make_shared[CKeyValueMetadata]())
-        c_string key, value
-
-    for py_key, py_value in metadata.items():
-        key = tobytes(py_key)
-        value = tobytes(py_value)
-        meta.get().Append(key, value)
-    out[0] = meta
-    return 0
-
-
-def field(name, DataType type, bint nullable=True, dict metadata=None):
-    """
-    Create a pyarrow.Field instance
-
-    Parameters
-    ----------
-    name : string or bytes
-    type : pyarrow.DataType
-    nullable : boolean, default True
-    metadata : dict, default None
-        Keys and values must be coercible to bytes
-
-    Returns
-    -------
-    field : pyarrow.Field
-    """
-    cdef:
-        shared_ptr[CKeyValueMetadata] c_meta
-        Field result = Field()
-
-    if metadata is not None:
-        convert_metadata(metadata, &c_meta)
-
-    result.sp_field.reset(new CField(tobytes(name), type.sp_type,
-                                     nullable, c_meta))
-    result.field = result.sp_field.get()
-    result.type = type
-    return result
-
-
-cdef set PRIMITIVE_TYPES = set([
-    _Type_NA, _Type_BOOL,
-    _Type_UINT8, _Type_INT8,
-    _Type_UINT16, _Type_INT16,
-    _Type_UINT32, _Type_INT32,
-    _Type_UINT64, _Type_INT64,
-    _Type_TIMESTAMP, _Type_DATE32,
-    _Type_DATE64,
-    _Type_HALF_FLOAT,
-    _Type_FLOAT,
-    _Type_DOUBLE])
-
-
-def null():
-    return primitive_type(_Type_NA)
-
-
-def bool_():
-    return primitive_type(_Type_BOOL)
-
-
-def uint8():
-    return primitive_type(_Type_UINT8)
-
-
-def int8():
-    return primitive_type(_Type_INT8)
-
-
-def uint16():
-    return primitive_type(_Type_UINT16)
-
-
-def int16():
-    return primitive_type(_Type_INT16)
-
-
-def uint32():
-    return primitive_type(_Type_UINT32)
-
-
-def int32():
-    return primitive_type(_Type_INT32)
-
-
-def uint64():
-    return primitive_type(_Type_UINT64)
-
-
-def int64():
-    return primitive_type(_Type_INT64)
-
-
-cdef dict _timestamp_type_cache = {}
-cdef dict _time_type_cache = {}
-
-
-cdef timeunit_to_string(TimeUnit unit):
-    if unit == TimeUnit_SECOND:
-        return 's'
-    elif unit == TimeUnit_MILLI:
-        return 'ms'
-    elif unit == TimeUnit_MICRO:
-        return 'us'
-    elif unit == TimeUnit_NANO:
-        return 'ns'
-
-
-def timestamp(unit_str, tz=None):
-    cdef:
-        TimeUnit unit
-        c_string c_timezone
-
-    if unit_str == "s":
-        unit = TimeUnit_SECOND
-    elif unit_str == 'ms':
-        unit = TimeUnit_MILLI
-    elif unit_str == 'us':
-        unit = TimeUnit_MICRO
-    elif unit_str == 'ns':
-        unit = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit string')
-
-    cdef TimestampType out = TimestampType()
-
-    if tz is None:
-        out.init(ctimestamp(unit))
-        if unit in _timestamp_type_cache:
-            return _timestamp_type_cache[unit]
-        _timestamp_type_cache[unit] = out
-    else:
-        if not isinstance(tz, six.string_types):
-            tz = tz.zone
-
-        c_timezone = tobytes(tz)
-        out.init(ctimestamp(unit, c_timezone))
-
-    return out
-
-
-def time32(unit_str):
-    cdef:
-        TimeUnit unit
-        c_string c_timezone
-
-    if unit_str == "s":
-        unit = TimeUnit_SECOND
-    elif unit_str == 'ms':
-        unit = TimeUnit_MILLI
-    else:
-        raise ValueError('Invalid TimeUnit for time32: {}'.format(unit_str))
-
-    cdef Time32Type out
-    if unit in _time_type_cache:
-        return _time_type_cache[unit]
-    else:
-        out = Time32Type()
-        out.init(ctime32(unit))
-        _time_type_cache[unit] = out
-        return out
-
-
-def time64(unit_str):
-    cdef:
-        TimeUnit unit
-        c_string c_timezone
-
-    if unit_str == "us":
-        unit = TimeUnit_MICRO
-    elif unit_str == 'ns':
-        unit = TimeUnit_NANO
-    else:
-        raise ValueError('Invalid TimeUnit for time64: {}'.format(unit_str))
-
-    cdef Time64Type out
-    if unit in _time_type_cache:
-        return _time_type_cache[unit]
-    else:
-        out = Time64Type()
-        out.init(ctime64(unit))
-        _time_type_cache[unit] = out
-        return out
-
-
-def date32():
-    return primitive_type(_Type_DATE32)
-
-
-def date64():
-    return primitive_type(_Type_DATE64)
-
-
-def float16():
-    return primitive_type(_Type_HALF_FLOAT)
-
-
-def float32():
-    return primitive_type(_Type_FLOAT)
-
-
-def float64():
-    return primitive_type(_Type_DOUBLE)
-
-
-cpdef DataType decimal(int precision, int scale=0):
-    cdef shared_ptr[CDataType] decimal_type
-    decimal_type.reset(new CDecimalType(precision, scale))
-    return box_data_type(decimal_type)
-
-
-def string():
-    """
-    UTF8 string
-    """
-    return primitive_type(_Type_STRING)
-
-
-def binary(int length=-1):
-    """Binary (PyBytes-like) type
-
-    Parameters
-    ----------
-    length : int, optional, default -1
-        If length == -1 then return a variable length binary type. If length is
-        greater than or equal to 0 then return a fixed size binary type of
-        width `length`.
-    """
-    if length == -1:
-        return primitive_type(_Type_BINARY)
-
-    cdef shared_ptr[CDataType] fixed_size_binary_type
-    fixed_size_binary_type.reset(new CFixedSizeBinaryType(length))
-    return box_data_type(fixed_size_binary_type)
-
-
-def list_(DataType value_type):
-    cdef DataType out = DataType()
-    cdef shared_ptr[CDataType] list_type
-    list_type.reset(new CListType(value_type.sp_type))
-    out.init(list_type)
-    return out
-
-
-def dictionary(DataType index_type, Array dictionary):
-    """
-    Dictionary (categorical, or simply encoded) type
-    """
-    cdef DictionaryType out = DictionaryType()
-    cdef shared_ptr[CDataType] dict_type
-    dict_type.reset(new CDictionaryType(index_type.sp_type,
-                                        dictionary.sp_array))
-    out.init(dict_type)
-    return out
-
-
-def struct(fields):
-    """
-
-    """
-    cdef:
-        DataType out = DataType()
-        Field field
-        vector[shared_ptr[CField]] c_fields
-        cdef shared_ptr[CDataType] struct_type
-
-    for field in fields:
-        c_fields.push_back(field.sp_field)
-
-    struct_type.reset(new CStructType(c_fields))
-    out.init(struct_type)
-    return out
-
-
-def schema(fields):
-    """
-    Construct pyarrow.Schema from collection of fields
-
-    Parameters
-    ----------
-    field : list or iterable
-
-    Returns
-    -------
-    schema : pyarrow.Schema
-    """
-    cdef:
-        Schema result
-        Field field
-        vector[shared_ptr[CField]] c_fields
-
-    for i, field in enumerate(fields):
-        c_fields.push_back(field.sp_field)
-
-    result = Schema()
-    result.init(c_fields)
-    return result
-
-
-cdef DataType box_data_type(const shared_ptr[CDataType]& type):
-    cdef:
-        DataType out
-
-    if type.get() == NULL:
-        return None
-
-    if type.get().id() == _Type_DICTIONARY:
-        out = DictionaryType()
-    elif type.get().id() == _Type_TIMESTAMP:
-        out = TimestampType()
-    elif type.get().id() == _Type_FIXED_SIZE_BINARY:
-        out = FixedSizeBinaryType()
-    elif type.get().id() == _Type_DECIMAL:
-        out = DecimalType()
-    else:
-        out = DataType()
-
-    out.init(type)
-    return out
-
-cdef Field box_field(const shared_ptr[CField]& field):
-    if field.get() == NULL:
-        return None
-    cdef Field out = Field()
-    out.init(field)
-    return out
-
-cdef Schema box_schema(const shared_ptr[CSchema]& type):
-    cdef Schema out = Schema()
-    out.init_schema(type)
-    return out
-
-
-def from_numpy_dtype(object dtype):
-    """
-    Convert NumPy dtype to pyarrow.DataType
-    """
-    cdef shared_ptr[CDataType] c_type
-    with nogil:
-        check_status(pyarrow.NumPyDtypeToArrow(dtype, &c_type))
-
-    return box_data_type(c_type)
-
-
-NA = None
-
-
-cdef class NAType(Scalar):
-
-    def __cinit__(self):
-        global NA
-        if NA is not None:
-            raise Exception('Cannot create multiple NAType instances')
-
-        self.type = null()
-
-    def __repr__(self):
-        return 'NA'
-
-    def as_py(self):
-        return None
-
-
-NA = NAType()
-
-
-cdef class ArrayValue(Scalar):
-
-    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-                   int64_t index):
-        self.type = type
-        self.index = index
-        self._set_array(sp_array)
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-
-    def __repr__(self):
-        if hasattr(self, 'as_py'):
-            return repr(self.as_py())
-        else:
-            return super(Scalar, self).__repr__()
-
-
-cdef class BooleanValue(ArrayValue):
-
-    def as_py(self):
-        cdef CBooleanArray* ap = <CBooleanArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int8Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt8Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int16Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt16Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Date32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CDate32Array* ap = <CDate32Array*> self.sp_array.get()
-
-        # Shift to seconds since epoch
-        return datetime.datetime.utcfromtimestamp(
-            int(ap.Value(self.index)) * 86400).date()
-
-
-cdef class Date64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CDate64Array* ap = <CDate64Array*> self.sp_array.get()
-        return datetime.datetime.utcfromtimestamp(
-            ap.Value(self.index) / 1000).date()
-
-
-cdef class TimestampValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CTimestampArray* ap = <CTimestampArray*> self.sp_array.get()
-            CTimestampType* dtype = <CTimestampType*>ap.type().get()
-            int64_t val = ap.Value(self.index)
-
-        timezone = None
-        tzinfo = None
-        if dtype.timezone().size() > 0:
-            timezone = frombytes(dtype.timezone())
-            import pytz
-            tzinfo = pytz.timezone(timezone)
-
-        try:
-            pd = _pandas()
-            if dtype.unit() == TimeUnit_SECOND:
-                val = val * 1000000000
-            elif dtype.unit() == TimeUnit_MILLI:
-                val = val * 1000000
-            elif dtype.unit() == TimeUnit_MICRO:
-                val = val * 1000
-            return pd.Timestamp(val, tz=tzinfo)
-        except ImportError:
-            if dtype.unit() == TimeUnit_SECOND:
-                result = datetime.datetime.utcfromtimestamp(val)
-            elif dtype.unit() == TimeUnit_MILLI:
-                result = datetime.datetime.utcfromtimestamp(float(val) / 1000)
-            elif dtype.unit() == TimeUnit_MICRO:
-                result = datetime.datetime.utcfromtimestamp(
-                    float(val) / 1000000)
-            else:
-                # TimeUnit_NANO
-                raise NotImplementedError("Cannot convert nanosecond "
-                                          "timestamps without pandas")
-            if timezone is not None:
-                result = result.replace(tzinfo=tzinfo)
-            return result
-
-
-cdef class FloatValue(ArrayValue):
-
-    def as_py(self):
-        cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class DoubleValue(ArrayValue):
-
-    def as_py(self):
-        cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class DecimalValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CDecimalArray* ap = <CDecimalArray*> self.sp_array.get()
-            c_string s = ap.FormatValue(self.index)
-        return _pydecimal.Decimal(s.decode('utf8'))
-
-
-cdef class StringValue(ArrayValue):
-
-    def as_py(self):
-        cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
-        return ap.GetString(self.index).decode('utf-8')
-
-
-cdef class BinaryValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            const uint8_t* ptr
-            int32_t length
-            CBinaryArray* ap = <CBinaryArray*> self.sp_array.get()
-
-        ptr = ap.GetValue(self.index, &length)
-        return cp.PyBytes_FromStringAndSize(<const char*>(ptr), length)
-
-
-cdef class ListValue(ArrayValue):
-
-    def __len__(self):
-        return self.ap.value_length(self.index)
-
-    def __getitem__(self, i):
-        return self.getitem(i)
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield self.getitem(i)
-        raise StopIteration
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-        self.ap = <CListArray*> sp_array.get()
-        self.value_type = box_data_type(self.ap.value_type())
-
-    cdef getitem(self, int64_t i):
-        cdef int64_t j = self.ap.value_offset(self.index) + i
-        return box_scalar(self.value_type, self.ap.values(), j)
-
-    def as_py(self):
-        cdef:
-            int64_t j
-            list result = []
-
-        for j in range(len(self)):
-            result.append(self.getitem(j).as_py())
-
-        return result
-
-
-cdef class FixedSizeBinaryValue(ArrayValue):
-
-    def as_py(self):
-        cdef:
-            CFixedSizeBinaryArray* ap
-            CFixedSizeBinaryType* ap_type
-            int32_t length
-            const char* data
-        ap = <CFixedSizeBinaryArray*> self.sp_array.get()
-        ap_type = <CFixedSizeBinaryType*> ap.type().get()
-        length = ap_type.byte_width()
-        data = <const char*> ap.GetValue(self.index)
-        return cp.PyBytes_FromStringAndSize(data, length)
-
-
-
-cdef dict _scalar_classes = {
-    _Type_BOOL: BooleanValue,
-    _Type_UINT8: Int8Value,
-    _Type_UINT16: Int16Value,
-    _Type_UINT32: Int32Value,
-    _Type_UINT64: Int64Value,
-    _Type_INT8: Int8Value,
-    _Type_INT16: Int16Value,
-    _Type_INT32: Int32Value,
-    _Type_INT64: Int64Value,
-    _Type_DATE32: Date32Value,
-    _Type_DATE64: Date64Value,
-    _Type_TIMESTAMP: TimestampValue,
-    _Type_FLOAT: FloatValue,
-    _Type_DOUBLE: DoubleValue,
-    _Type_LIST: ListValue,
-    _Type_BINARY: BinaryValue,
-    _Type_STRING: StringValue,
-    _Type_FIXED_SIZE_BINARY: FixedSizeBinaryValue,
-    _Type_DECIMAL: DecimalValue,
-}
-
-cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
-                       int64_t index):
-    cdef ArrayValue val
-    if type.type.id() == _Type_NA:
-        return NA
-    elif sp_array.get().IsNull(index):
-        return NA
-    else:
-        val = _scalar_classes[type.type.id()]()
-        val.init(type, sp_array, index)
-        return val
-
-
-cdef maybe_coerce_datetime64(values, dtype, DataType type,
-                             timestamps_to_ms=False):
-
-    from pyarrow.compat import DatetimeTZDtype
-
-    if values.dtype.type != np.datetime64:
-        return values, type
-
-    coerce_ms = timestamps_to_ms and values.dtype != 'datetime64[ms]'
-
-    if coerce_ms:
-        values = values.astype('datetime64[ms]')
-
-    if isinstance(dtype, DatetimeTZDtype):
-        tz = dtype.tz
-        unit = 'ms' if coerce_ms else dtype.unit
-        type = timestamp(unit, tz)
-    elif type is None:
-        # Trust the NumPy dtype
-        type = from_numpy_dtype(values.dtype)
-
-    return values, type
-
-
-
-def array(object sequence, DataType type=None, MemoryPool memory_pool=None):
-    """
-    Create pyarrow.Array instance from a Python sequence
-
-    Parameters
-    ----------
-    sequence : sequence-like object of Python objects
-    type : pyarrow.DataType, optional
-        If not passed, will be inferred from the data
-    memory_pool : pyarrow.MemoryPool, optional
-        If not passed, will allocate memory from the currently-set default
-        memory pool
-
-    Returns
-    -------
-    array : pyarrow.Array
-    """
-    cdef:
-       shared_ptr[CArray] sp_array
-       CMemoryPool* pool
-
-    pool = maybe_unbox_memory_pool(memory_pool)
-    if type is None:
-        check_status(pyarrow.ConvertPySequence(sequence, pool, &sp_array))
-    else:
-        check_status(
-            pyarrow.ConvertPySequence(
-                sequence, pool, &sp_array, type.sp_type
-            )
-        )
-
-    return box_array(sp_array)
-
-
-
-cdef class Array:
-
-    cdef init(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-        self.ap = sp_array.get()
-        self.type = box_data_type(self.sp_array.get().type())
-
-    @staticmethod
-    def from_pandas(obj, mask=None, DataType type=None,
-                    timestamps_to_ms=False,
-                    MemoryPool memory_pool=None):
-        """
-        Convert pandas.Series to an Arrow Array.
-
-        Parameters
-        ----------
-        series : pandas.Series or numpy.ndarray
-
-        mask : pandas.Series or numpy.ndarray, optional
-            boolean mask if the object is valid or null
-
-        type : pyarrow.DataType
-            Explicit type to attempt to coerce to
-
-        timestamps_to_ms : bool, optional
-            Convert datetime columns to ms resolution. This is needed for
-            compatibility with other functionality like Parquet I/O which
-            only supports milliseconds.
-
-        memory_pool: MemoryPool, optional
-            Specific memory pool to use to allocate the resulting Arrow array.
-
-        Notes
-        -----
-        Localized timestamps will currently be returned as UTC (pandas's native
-        representation).  Timezone-naive data will be implicitly interpreted as
-        UTC.
-
-        Examples
-        --------
-
-        >>> import pandas as pd
-        >>> import pyarrow as pa
-        >>> pa.Array.from_pandas(pd.Series([1, 2]))
-        <pyarrow.array.Int64Array object at 0x7f674e4c0e10>
-        [
-          1,
-          2
-        ]
-
-        >>> import numpy as np
-        >>> pa.Array.from_pandas(pd.Series([1, 2]), np.array([0, 1],
-        ... dtype=bool))
-        <pyarrow.array.Int64Array object at 0x7f9019e11208>
-        [
-          1,
-          NA
-        ]
-
-        Returns
-        -------
-        pyarrow.array.Array
-        """
-        cdef:
-            shared_ptr[CArray] out
-            shared_ptr[CDataType] c_type
-            CMemoryPool* pool
-
-        if mask is not None:
-            mask = get_series_values(mask)
-
-        values = get_series_values(obj)
-        pool = maybe_unbox_memory_pool(memory_pool)
-
-        if isinstance(values, Categorical):
-            return DictionaryArray.from_arrays(
-                values.codes, values.categories.values,
-                mask=mask, memory_pool=memory_pool)
-        elif values.dtype == object:
-            # Object dtype undergoes a different conversion path as more type
-            # inference may be needed
-            if type is not None:
-                c_type = type.sp_type
-            with nogil:
-                check_status(pyarrow.PandasObjectsToArrow(
-                    pool, values, mask, c_type, &out))
-        else:
-            values, type = maybe_coerce_datetime64(
-                values, obj.dtype, type, timestamps_to_ms=timestamps_to_ms)
-
-            if type is None:
-                check_status(pyarrow.NumPyDtypeToArrow(values.dtype, &c_type))
-            else:
-                c_type = type.sp_type
-
-            with nogil:
-                check_status(pyarrow.PandasToArrow(
-                    pool, values, mask, c_type, &out))
-
-        return box_array(out)
-
-    property null_count:
-
-        def __get__(self):
-            return self.sp_array.get().null_count()
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield self.getitem(i)
-        raise StopIteration
-
-    def __repr__(self):
-        from pyarrow.formatting import array_format
-        type_format = object.__repr__(self)
-        values = array_format(self, window=10)
-        return '{0}\n{1}'.format(type_format, values)
-
-    def equals(Array self, Array other):
-        return self.ap.Equals(deref(other.ap))
-
-    def __len__(self):
-        if self.sp_array.get():
-            return self.sp_array.get().length()
-        else:
-            return 0
-
-    def isnull(self):
-        raise NotImplemented
-
-    def __getitem__(self, key):
-        cdef:
-            Py_ssize_t n = len(self)
-
-        if PySlice_Check(key):
-            start = key.start or 0
-            while start < 0:
-                start += n
-
-            stop = key.stop if key.stop is not None else n
-            while stop < 0:
-                stop += n
-
-            step = key.step or 1
-            if step != 1:
-                raise IndexError('only slices with step 1 supported')
-            else:
-                return self.slice(start, stop - start)
-
-        while key < 0:
-            key += len(self)
-
-        return self.getitem(key)
-
-    cdef getitem(self, int64_t i):
-        return box_scalar(self.type, self.sp_array, i)
-
-    def slice(self, offset=0, length=None):
-        """
-        Compute zero-copy slice of this array
-
-        Parameters
-        ----------
-        offset : int, default 0
-            Offset from start of array to slice
-        length : int, default None
-            Length of slice (default is until end of Array starting from
-            offset)
-
-        Returns
-        -------
-        sliced : RecordBatch
-        """
-        cdef:
-            shared_ptr[CArray] result
-
-        if offset < 0:
-            raise IndexError('Offset must be non-negative')
-
-        if length is None:
-            result = self.ap.Slice(offset)
-        else:
-            result = self.ap.Slice(offset, length)
-
-        return box_array(result)
-
-    def to_pandas(self):
-        """
-        Convert to an array object suitable for use in pandas
-
-        See also
-        --------
-        Column.to_pandas
-        Table.to_pandas
-        RecordBatch.to_pandas
-        """
-        cdef:
-            PyObject* out
-
-        with nogil:
-            check_status(
-                pyarrow.ConvertArrayToPandas(self.sp_array, self, &out))
-        return wrap_array_output(out)
-
-    def to_pylist(self):
-        """
-        Convert to an list of native Python objects.
-        """
-        return [x.as_py() for x in self]
-
-
-cdef class Tensor:
-
-    cdef init(self, const shared_ptr[CTensor]& sp_tensor):
-        self.sp_tensor = sp_tensor
-        self.tp = sp_tensor.get()
-        self.type = box_data_type(self.tp.type())
-
-    def __repr__(self):
-        return """<pyarrow.Tensor>
-type: {0}
-shape: {1}
-strides: {2}""".format(self.type, self.shape, self.strides)
-
-    @staticmethod
-    def from_numpy(obj):
-        cdef shared_ptr[CTensor] ctensor
-        check_status(pyarrow.NdarrayToTensor(default_memory_pool(),
-                                             obj, &ctensor))
-        return box_tensor(ctensor)
-
-    def to_numpy(self):
-        """
-        Convert arrow::Tensor to numpy.ndarray with zero copy
-        """
-        cdef:
-            PyObject* out
-
-        check_status(pyarrow.TensorToNdarray(deref(self.tp), self, &out))
-        return PyObject_to_object(out)
-
-    def equals(self, Tensor other):
-        """
-        Return true if the tensors contains exactly equal data
-        """
-        return self.tp.Equals(deref(other.tp))
-
-    property is_mutable:
-
-        def __get__(self):
-            return self.tp.is_mutable()
-
-    property is_contiguous:
-
-        def __get__(self):
-            return self.tp.is_contiguous()
-
-    property ndim:
-
-        def __get__(self):
-            return self.tp.ndim()
-
-    property size:
-
-        def __get__(self):
-            return self.tp.size()
-
-    property shape:
-
-        def __get__(self):
-            cdef size_t i
-            py_shape = []
-            for i in range(self.tp.shape().size()):
-                py_shape.append(self.tp.shape()[i])
-            return py_shape
-
-    property strides:
-
-        def __get__(self):
-            cdef size_t i
-            py_strides = []
-            for i in range(self.tp.strides().size()):
-                py_strides.append(self.tp.strides()[i])
-            return py_strides
-
-
-
-cdef wrap_array_output(PyObject* output):
-    cdef object obj = PyObject_to_object(output)
-
-    if isinstance(obj, dict):
-        return Categorical(obj['indices'],
-                           categories=obj['dictionary'],
-                           fastpath=True)
-    else:
-        return obj
-
-
-cdef class NullArray(Array):
-    pass
-
-
-cdef class BooleanArray(Array):
-    pass
-
-
-cdef class NumericArray(Array):
-    pass
-
-
-cdef class IntegerArray(NumericArray):
-    pass
-
-
-cdef class FloatingPointArray(NumericArray):
-    pass
-
-
-cdef class Int8Array(IntegerArray):
-    pass
-
-
-cdef class UInt8Array(IntegerArray):
-    pass
-
-
-cdef class Int16Array(IntegerArray):
-    pass
-
-
-cdef class UInt16Array(IntegerArray):
-    pass
-
-
-cdef class Int32Array(IntegerArray):
-    pass
-
-
-cdef class UInt32Array(IntegerArray):
-    pass
-
-
-cdef class Int64Array(IntegerArray):
-    pass
-
-
-cdef class UInt64Array(IntegerArray):
-    pass
-
-
-cdef class Date32Array(NumericArray):
-    pass
-
-
-cdef class Date64Array(NumericArray):
-    pass
-
-
-cdef class TimestampArray(NumericArray):
-    pass
-
-
-cdef class Time32Array(NumericArray):
-    pass
-
-
-cdef class Time64Array(NumericArray):
-    pass
-
-
-cdef class FloatArray(FloatingPointArray):
-    pass
-
-
-cdef class DoubleArray(FloatingPointArray):
-    pass
-
-
-cdef class FixedSizeBinaryArray(Array):
-    pass
-
-
-cdef class DecimalArray(FixedSizeBinaryArray):
-    pass
-
-
-cdef class ListArray(Array):
-    pass
-
-
-cdef class StringArray(Array):
-    pass
-
-
-cdef class BinaryArray(Array):
-    pass
-
-
-cdef class DictionaryArray(Array):
-
-    cdef getitem(self, int64_t i):
-        cdef Array dictionary = self.dictionary
-        index = self.indices[i]
-        if index is NA:
-            return index
-        else:
-            return box_scalar(dictionary.type, dictionary.sp_array,
-                              index.as_py())
-
-    property dictionary:
-
-        def __get__(self):
-            cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
-
-            if self._dictionary is None:
-                self._dictionary = box_array(darr.dictionary())
-
-            return self._dictionary
-
-    property indices:
-
-        def __get__(self):
-            cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
-
-            if self._indices is None:
-                self._indices = box_array(darr.indices())
-
-            return self._indices
-
-    @staticmethod
-    def from_arrays(indices, dictionary, mask=None,
-                    MemoryPool memory_pool=None):
-        """
-        Construct Arrow DictionaryArray from array of indices (must be
-        non-negative integers) and corresponding array of dictionary values
-
-        Parameters
-        ----------
-        indices : ndarray or pandas.Series, integer type
-        dictionary : ndarray or pandas.Series
-        mask : ndarray or pandas.Series, boolean type
-            True values indicate that indices are actually null
-
-        Returns
-        -------
-        dict_array : DictionaryArray
-        """
-        cdef:
-            Array arrow_indices, arrow_dictionary
-            DictionaryArray result
-            shared_ptr[CDataType] c_type
-            shared_ptr[CArray] c_result
-
-        if isinstance(indices, Array):
-            if mask is not None:
-                raise NotImplementedError(
-                    "mask not implemented with Arrow array inputs yet")
-            arrow_indices = indices
-        else:
-            if mask is None:
-                mask = indices == -1
-            else:
-                mask = mask | (indices == -1)
-            arrow_indices = Array.from_pandas(indices, mask=mask,
-                                              memory_pool=memory_pool)
-
-        if isinstance(dictionary, Array):
-            arrow_dictionary = dictionary
-        else:
-            arrow_dictionary = Array.from_pandas(dictionary,
-                                                 memory_pool=memory_pool)
-
-        if not isinstance(arrow_indices, IntegerArray):
-            raise ValueError('Indices must be integer type')
-
-        c_type.reset(new CDictionaryType(arrow_indices.type.sp_type,
-                                         arrow_dictionary.sp_array))
-        c_result.reset(new CDictionaryArray(c_type, arrow_indices.sp_array))
-
-        result = DictionaryArray()
-        result.init(c_result)
-        return result
-
-
-cdef dict _array_classes = {
-    _Type_NA: NullArray,
-    _Type_BOOL: BooleanArray,
-    _Type_UINT8: UInt8Array,
-    _Type_UINT16: UInt16Array,
-    _Type_UINT32: UInt32Array,
-    _Type_UINT64: UInt64Array,
-    _Type_INT8: Int8Array,
-    _Type_INT16: Int16Array,
-    _Type_INT32: Int32Array,
-    _Type_INT64: Int64Array,
-    _Type_DATE32: Date32Array,
-    _Type_DATE64: Date64Array,
-    _Type_TIMESTAMP: TimestampArray,
-    _Type_TIME32: Time32Array,
-    _Type_TIME64: Time64Array,
-    _Type_FLOAT: FloatArray,
-    _Type_DOUBLE: DoubleArray,
-    _Type_LIST: ListArray,
-    _Type_BINARY: BinaryArray,
-    _Type_STRING: StringArray,
-    _Type_DICTIONARY: DictionaryArray,
-    _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
-    _Type_DECIMAL: DecimalArray,
-}
-
-cdef object box_array(const shared_ptr[CArray]& sp_array):
-    if sp_array.get() == NULL:
-        raise ValueError('Array was NULL')
-
-    cdef CDataType* data_type = sp_array.get().type().get()
-
-    if data_type == NULL:
-        raise ValueError('Array data type was NULL')
-
-    cdef Array arr = _array_classes[data_type.id()]()
-    arr.init(sp_array)
-    return arr
-
-
-cdef object box_tensor(const shared_ptr[CTensor]& sp_tensor):
-    if sp_tensor.get() == NULL:
-        raise ValueError('Tensor was NULL')
-
-    cdef Tensor tensor = Tensor()
-    tensor.init(sp_tensor)
-    return tensor
-
-
-cdef object get_series_values(object obj):
-    if isinstance(obj, PandasSeries):
-        result = obj.values
-    elif isinstance(obj, np.ndarray):
-        result = obj
-    else:
-        result = PandasSeries(obj).values
-
-    return result

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/_error.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/_error.pxd b/python/pyarrow/_error.pxd
deleted file mode 100644
index 4fb46c2..0000000
--- a/python/pyarrow/_error.pxd
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.includes.libarrow cimport CStatus
-
-cdef int check_status(const CStatus& status) nogil except -1

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/_error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_error.pyx b/python/pyarrow/_error.pyx
deleted file mode 100644
index 259aeb0..0000000
--- a/python/pyarrow/_error.pyx
+++ /dev/null
@@ -1,70 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.includes.libarrow cimport CStatus
-from pyarrow.includes.common cimport c_string
-from pyarrow.compat import frombytes
-
-
-class ArrowException(Exception):
-    pass
-
-
-class ArrowInvalid(ValueError, ArrowException):
-    pass
-
-
-class ArrowMemoryError(MemoryError, ArrowException):
-    pass
-
-
-class ArrowIOError(IOError, ArrowException):
-    pass
-
-
-class ArrowKeyError(KeyError, ArrowException):
-    pass
-
-
-class ArrowTypeError(TypeError, ArrowException):
-    pass
-
-
-class ArrowNotImplementedError(NotImplementedError, ArrowException):
-    pass
-
-
-cdef int check_status(const CStatus& status) nogil except -1:
-    if status.ok():
-        return 0
-
-    with gil:
-        message = frombytes(status.ToString())
-        if status.IsInvalid():
-            raise ArrowInvalid(message)
-        elif status.IsIOError():
-            raise ArrowIOError(message)
-        elif status.IsOutOfMemory():
-            raise ArrowMemoryError(message)
-        elif status.IsKeyError():
-            raise ArrowKeyError(message)
-        elif status.IsNotImplemented():
-            raise ArrowNotImplementedError(message)
-        elif status.IsTypeError():
-            raise ArrowTypeError(message)
-        else:
-            raise ArrowException(message)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9e875a68/python/pyarrow/_io.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/_io.pxd b/python/pyarrow/_io.pxd
deleted file mode 100644
index 0c37a09..0000000
--- a/python/pyarrow/_io.pxd
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-
-
-cdef class Buffer:
-    cdef:
-        shared_ptr[CBuffer] buffer
-        Py_ssize_t shape[1]
-        Py_ssize_t strides[1]
-
-    cdef init(self, const shared_ptr[CBuffer]& buffer)
-
-
-cdef class NativeFile:
-    cdef:
-        shared_ptr[RandomAccessFile] rd_file
-        shared_ptr[OutputStream] wr_file
-        bint is_readable
-        bint is_writeable
-        bint is_open
-        bint own_file
-
-    # By implementing these "virtual" functions (all functions in Cython
-    # extension classes are technically virtual in the C++ sense) we can expose
-    # the arrow::io abstract file interfaces to other components throughout the
-    # suite of Arrow C++ libraries
-    cdef read_handle(self, shared_ptr[RandomAccessFile]* file)
-    cdef write_handle(self, shared_ptr[OutputStream]* file)
-
-cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader)
-cdef get_writer(object source, shared_ptr[OutputStream]* writer)