You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/10 00:45:11 UTC

[1/2] arrow git commit: ARROW-54: [Python] Rename package to "pyarrow"

Repository: arrow
Updated Branches:
  refs/heads/master 83675273b -> 6fdcd4943


http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
new file mode 100644
index 0000000..baba112
--- /dev/null
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+
+    enum LogicalType" arrow::LogicalType::type":
+        LogicalType_NA" arrow::LogicalType::NA"
+
+        LogicalType_BOOL" arrow::LogicalType::BOOL"
+
+        LogicalType_UINT8" arrow::LogicalType::UINT8"
+        LogicalType_INT8" arrow::LogicalType::INT8"
+        LogicalType_UINT16" arrow::LogicalType::UINT16"
+        LogicalType_INT16" arrow::LogicalType::INT16"
+        LogicalType_UINT32" arrow::LogicalType::UINT32"
+        LogicalType_INT32" arrow::LogicalType::INT32"
+        LogicalType_UINT64" arrow::LogicalType::UINT64"
+        LogicalType_INT64" arrow::LogicalType::INT64"
+
+        LogicalType_FLOAT" arrow::LogicalType::FLOAT"
+        LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+
+        LogicalType_STRING" arrow::LogicalType::STRING"
+
+        LogicalType_LIST" arrow::LogicalType::LIST"
+        LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+
+    cdef cppclass CDataType" arrow::DataType":
+        LogicalType type
+        c_bool nullable
+
+        c_bool Equals(const CDataType* other)
+
+        c_string ToString()
+
+    cdef cppclass MemoryPool" arrow::MemoryPool":
+        int64_t bytes_allocated()
+
+    cdef cppclass CListType" arrow::ListType"(CDataType):
+        CListType(const shared_ptr[CDataType]& value_type,
+                  c_bool nullable)
+
+    cdef cppclass CStringType" arrow::StringType"(CDataType):
+        pass
+
+    cdef cppclass CField" arrow::Field":
+        c_string name
+        shared_ptr[CDataType] type
+
+        CField(const c_string& name, const shared_ptr[CDataType]& type)
+
+    cdef cppclass CStructType" arrow::StructType"(CDataType):
+        CStructType(const vector[shared_ptr[CField]]& fields,
+                    c_bool nullable)
+
+    cdef cppclass CSchema" arrow::Schema":
+        CSchema(const shared_ptr[CField]& fields)
+
+    cdef cppclass CArray" arrow::Array":
+        const shared_ptr[CDataType]& type()
+
+        int32_t length()
+        int32_t null_count()
+        LogicalType logical_type()
+
+        c_bool IsNull(int i)
+
+    cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
+        uint8_t Value(int i)
+
+    cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
+        int8_t Value(int i)
+
+    cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
+        uint16_t Value(int i)
+
+    cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
+        int16_t Value(int i)
+
+    cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
+        uint32_t Value(int i)
+
+    cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
+        int32_t Value(int i)
+
+    cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
+        uint64_t Value(int i)
+
+    cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
+        int64_t Value(int i)
+
+    cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
+        float Value(int i)
+
+    cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
+        double Value(int i)
+
+    cdef cppclass CListArray" arrow::ListArray"(CArray):
+        const int32_t* offsets()
+        int32_t offset(int i)
+        int32_t value_length(int i)
+        const shared_ptr[CArray]& values()
+        const shared_ptr[CDataType]& value_type()
+
+    cdef cppclass CStringArray" arrow::StringArray"(CListArray):
+        c_string GetString(int i)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/parquet.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/parquet.pxd b/python/pyarrow/includes/parquet.pxd
new file mode 100644
index 0000000..99a2d42
--- /dev/null
+++ b/python/pyarrow/includes/parquet.pxd
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
+    cdef cppclass ColumnReader:
+        pass
+
+    cdef cppclass BoolReader(ColumnReader):
+        pass
+
+    cdef cppclass Int32Reader(ColumnReader):
+        pass
+
+    cdef cppclass Int64Reader(ColumnReader):
+        pass
+
+    cdef cppclass Int96Reader(ColumnReader):
+        pass
+
+    cdef cppclass FloatReader(ColumnReader):
+        pass
+
+    cdef cppclass DoubleReader(ColumnReader):
+        pass
+
+    cdef cppclass ByteArrayReader(ColumnReader):
+        pass
+
+    cdef cppclass RowGroupReader:
+        pass
+
+    cdef cppclass ParquetFileReader:
+        pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
new file mode 100644
index 0000000..9a0c004
--- /dev/null
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType,
+                                        MemoryPool)
+
+cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
+    # We can later add more of the common status factory methods as needed
+    cdef Status Status_OK "Status::OK"()
+
+    cdef cppclass Status:
+        Status()
+
+        c_string ToString()
+
+        c_bool ok()
+        c_bool IsOutOfMemory()
+        c_bool IsKeyError()
+        c_bool IsTypeError()
+        c_bool IsIOError()
+        c_bool IsValueError()
+        c_bool IsNotImplemented()
+        c_bool IsArrowError()
+
+    shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+    Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
+
+    MemoryPool* GetMemoryPool()

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx
new file mode 100644
index 0000000..622e7d0
--- /dev/null
+++ b/python/pyarrow/parquet.pyx
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.includes.parquet cimport *

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pxd b/python/pyarrow/scalar.pxd
new file mode 100644
index 0000000..b068457
--- /dev/null
+++ b/python/pyarrow/scalar.pxd
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+
+from pyarrow.schema cimport DataType
+
+cdef class Scalar:
+    cdef readonly:
+        DataType type
+
+
+cdef class NAType(Scalar):
+    pass
+
+
+cdef class ArrayValue(Scalar):
+    cdef:
+        shared_ptr[CArray] sp_array
+        int index
+
+    cdef void init(self, DataType type,
+                   const shared_ptr[CArray]& sp_array, int index)
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array)
+
+
+cdef class Int8Value(ArrayValue):
+    pass
+
+
+cdef class Int64Value(ArrayValue):
+    pass
+
+
+cdef class ListValue(ArrayValue):
+    cdef readonly:
+        DataType value_type
+
+    cdef:
+        CListArray* ap
+
+    cdef getitem(self, int i)
+
+
+cdef class StringValue(ArrayValue):
+    pass
+
+cdef object box_arrow_scalar(DataType type,
+                             const shared_ptr[CArray]& sp_array,
+                             int index)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
new file mode 100644
index 0000000..261a389
--- /dev/null
+++ b/python/pyarrow/scalar.pyx
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.schema cimport DataType, box_data_type
+
+from pyarrow.compat import frombytes
+import pyarrow.schema as schema
+
+NA = None
+
+cdef class NAType(Scalar):
+
+    def __cinit__(self):
+        global NA
+        if NA is not None:
+            raise Exception('Cannot create multiple NAType instances')
+
+        self.type = schema.null()
+
+    def __repr__(self):
+        return 'NA'
+
+    def as_py(self):
+        return None
+
+NA = NAType()
+
+cdef class ArrayValue(Scalar):
+
+    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
+                   int index):
+        self.type = type
+        self.index = index
+        self._set_array(sp_array)
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+
+    def __repr__(self):
+        if hasattr(self, 'as_py'):
+            return repr(self.as_py())
+        else:
+            return Scalar.__repr__(self)
+
+
+cdef class BooleanValue(ArrayValue):
+    pass
+
+
+cdef class Int8Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt8Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int16Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt16Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class FloatValue(ArrayValue):
+
+    def as_py(self):
+        cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class DoubleValue(ArrayValue):
+
+    def as_py(self):
+        cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class StringValue(ArrayValue):
+
+    def as_py(self):
+        cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
+        return frombytes(ap.GetString(self.index))
+
+
+cdef class ListValue(ArrayValue):
+
+    def __len__(self):
+        return self.ap.value_length(self.index)
+
+    def __getitem__(self, i):
+        return self.getitem(i)
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self.getitem(i)
+        raise StopIteration
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+        self.ap = <CListArray*> sp_array.get()
+        self.value_type = box_data_type(self.ap.value_type())
+
+    cdef getitem(self, int i):
+        cdef int j = self.ap.offset(self.index) + i
+        return box_arrow_scalar(self.value_type, self.ap.values(), j)
+
+    def as_py(self):
+        cdef:
+            int j
+            list result = []
+
+        for j in range(len(self)):
+            result.append(self.getitem(j).as_py())
+
+        return result
+
+
+cdef dict _scalar_classes = {
+    LogicalType_UINT8: Int8Value,
+    LogicalType_UINT16: Int16Value,
+    LogicalType_UINT32: Int32Value,
+    LogicalType_UINT64: Int64Value,
+    LogicalType_INT8: Int8Value,
+    LogicalType_INT16: Int16Value,
+    LogicalType_INT32: Int32Value,
+    LogicalType_INT64: Int64Value,
+    LogicalType_FLOAT: FloatValue,
+    LogicalType_DOUBLE: DoubleValue,
+    LogicalType_LIST: ListValue,
+    LogicalType_STRING: StringValue
+}
+
+cdef object box_arrow_scalar(DataType type,
+                             const shared_ptr[CArray]& sp_array,
+                             int index):
+    cdef ArrayValue val
+    if sp_array.get().IsNull(index):
+        return NA
+    else:
+        val = _scalar_classes[type.type.type]()
+        val.init(type, sp_array, index)
+        return val

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
new file mode 100644
index 0000000..07b9bd0
--- /dev/null
+++ b/python/pyarrow/schema.pxd
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.libarrow cimport CDataType, CField, CSchema
+
+cdef class DataType:
+    cdef:
+        shared_ptr[CDataType] sp_type
+        CDataType* type
+
+    cdef init(self, const shared_ptr[CDataType]& type)
+
+cdef class Field:
+    cdef:
+        shared_ptr[CField] sp_field
+        CField* field
+
+    cdef readonly:
+        DataType type
+
+cdef class Schema:
+    cdef:
+        shared_ptr[CSchema] sp_schema
+        CSchema* schema
+
+cdef DataType box_data_type(const shared_ptr[CDataType]& type)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
new file mode 100644
index 0000000..ea87872
--- /dev/null
+++ b/python/pyarrow/schema.pyx
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########################################
+# Data types, fields, schemas, and so forth
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+
+cimport cpython
+
+cdef class DataType:
+
+    def __cinit__(self):
+        pass
+
+    cdef init(self, const shared_ptr[CDataType]& type):
+        self.sp_type = type
+        self.type = type.get()
+
+    def __str__(self):
+        return frombytes(self.type.ToString())
+
+    def __repr__(self):
+        return 'DataType({0})'.format(str(self))
+
+    def __richcmp__(DataType self, DataType other, int op):
+        if op == cpython.Py_EQ:
+            return self.type.Equals(other.type)
+        elif op == cpython.Py_NE:
+            return not self.type.Equals(other.type)
+        else:
+            raise TypeError('Invalid comparison')
+
+
+cdef class Field:
+
+    def __cinit__(self, object name, DataType type):
+        self.type = type
+        self.sp_field.reset(new CField(tobytes(name), type.sp_type))
+        self.field = self.sp_field.get()
+
+    def __repr__(self):
+        return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+
+    property name:
+
+        def __get__(self):
+            return frombytes(self.field.name)
+
+cdef dict _type_cache = {}
+
+cdef DataType primitive_type(LogicalType type, bint nullable=True):
+    if (type, nullable) in _type_cache:
+        return _type_cache[type, nullable]
+
+    cdef DataType out = DataType()
+    out.init(pyarrow.GetPrimitiveType(type, nullable))
+
+    _type_cache[type, nullable] = out
+    return out
+
+#------------------------------------------------------------
+# Type factory functions
+
+def field(name, type):
+    return Field(name, type)
+
+cdef set PRIMITIVE_TYPES = set([
+    LogicalType_NA, LogicalType_BOOL,
+    LogicalType_UINT8, LogicalType_INT8,
+    LogicalType_UINT16, LogicalType_INT16,
+    LogicalType_UINT32, LogicalType_INT32,
+    LogicalType_UINT64, LogicalType_INT64,
+    LogicalType_FLOAT, LogicalType_DOUBLE])
+
+def null():
+    return primitive_type(LogicalType_NA)
+
+def bool_(c_bool nullable=True):
+    return primitive_type(LogicalType_BOOL, nullable)
+
+def uint8(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT8, nullable)
+
+def int8(c_bool nullable=True):
+    return primitive_type(LogicalType_INT8, nullable)
+
+def uint16(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT16, nullable)
+
+def int16(c_bool nullable=True):
+    return primitive_type(LogicalType_INT16, nullable)
+
+def uint32(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT32, nullable)
+
+def int32(c_bool nullable=True):
+    return primitive_type(LogicalType_INT32, nullable)
+
+def uint64(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT64, nullable)
+
+def int64(c_bool nullable=True):
+    return primitive_type(LogicalType_INT64, nullable)
+
+def float_(c_bool nullable=True):
+    return primitive_type(LogicalType_FLOAT, nullable)
+
+def double(c_bool nullable=True):
+    return primitive_type(LogicalType_DOUBLE, nullable)
+
+def string(c_bool nullable=True):
+    """
+    UTF8 string
+    """
+    return primitive_type(LogicalType_STRING, nullable)
+
+def list_(DataType value_type, c_bool nullable=True):
+    cdef DataType out = DataType()
+    out.init(shared_ptr[CDataType](
+        new CListType(value_type.sp_type, nullable)))
+    return out
+
+def struct(fields, c_bool nullable=True):
+    """
+
+    """
+    cdef:
+        DataType out = DataType()
+        Field field
+        vector[shared_ptr[CField]] c_fields
+
+    for field in fields:
+        c_fields.push_back(field.sp_field)
+
+    out.init(shared_ptr[CDataType](
+        new CStructType(c_fields, nullable)))
+    return out
+
+
+cdef DataType box_data_type(const shared_ptr[CDataType]& type):
+    cdef DataType out = DataType()
+    out.init(type)
+    return out

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/__init__.py b/python/pyarrow/tests/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
new file mode 100644
index 0000000..034c157
--- /dev/null
+++ b/python/pyarrow/tests/test_array.py
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow
+import pyarrow.formatting as fmt
+
+
+class TestArrayAPI(unittest.TestCase):
+
+    def test_getitem_NA(self):
+        arr = pyarrow.from_pylist([1, None, 2])
+        assert arr[1] is pyarrow.NA
+
+    def test_list_format(self):
+        arr = pyarrow.from_pylist([[1], None, [2, 3]])
+        result = fmt.array_format(arr)
+        expected = """\
+[
+  [1],
+  NA,
+  [2,
+   3]
+]"""
+        assert result == expected
+
+    def test_string_format(self):
+        arr = pyarrow.from_pylist(['foo', None, 'bar'])
+        result = fmt.array_format(arr)
+        expected = """\
+[
+  'foo',
+  NA,
+  'bar'
+]"""
+        assert result == expected
+
+    def test_long_array_format(self):
+        arr = pyarrow.from_pylist(range(100))
+        result = fmt.array_format(arr, window=2)
+        expected = """\
+[
+  0,
+  1,
+  ...
+  98,
+  99
+]"""
+        assert result == expected

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
new file mode 100644
index 0000000..25f6969
--- /dev/null
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow
+
+
+class TestConvertList(unittest.TestCase):
+
+    def test_boolean(self):
+        pass
+
+    def test_empty_list(self):
+        arr = pyarrow.from_pylist([])
+        assert len(arr) == 0
+        assert arr.null_count == 0
+        assert arr.type == pyarrow.null()
+
+    def test_all_none(self):
+        arr = pyarrow.from_pylist([None, None])
+        assert len(arr) == 2
+        assert arr.null_count == 2
+        assert arr.type == pyarrow.null()
+
+    def test_integer(self):
+        arr = pyarrow.from_pylist([1, None, 3, None])
+        assert len(arr) == 4
+        assert arr.null_count == 2
+        assert arr.type == pyarrow.int64()
+
+    def test_garbage_collection(self):
+        import gc
+        bytes_before = pyarrow.total_allocated_bytes()
+        pyarrow.from_pylist([1, None, 3, None])
+        gc.collect()
+        assert pyarrow.total_allocated_bytes() == bytes_before
+
+    def test_double(self):
+        data = [1.5, 1, None, 2.5, None, None]
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 6
+        assert arr.null_count == 3
+        assert arr.type == pyarrow.double()
+
+    def test_string(self):
+        data = ['foo', b'bar', None, 'arrow']
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pyarrow.string()
+
+    def test_mixed_nesting_levels(self):
+        pyarrow.from_pylist([1, 2, None])
+        pyarrow.from_pylist([[1], [2], None])
+        pyarrow.from_pylist([[1], [2], [None]])
+
+        with self.assertRaises(pyarrow.ArrowException):
+            pyarrow.from_pylist([1, 2, [1]])
+
+        with self.assertRaises(pyarrow.ArrowException):
+            pyarrow.from_pylist([1, 2, []])
+
+        with self.assertRaises(pyarrow.ArrowException):
+            pyarrow.from_pylist([[1], [2], [None, [1]]])
+
+    def test_list_of_int(self):
+        data = [[1, 2, 3], [], None, [1, 2]]
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pyarrow.list_(pyarrow.int64())

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
new file mode 100644
index 0000000..021737d
--- /dev/null
+++ b/python/pyarrow/tests/test_scalars.py
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest, u
+import pyarrow as arrow
+
+
+class TestScalars(unittest.TestCase):
+
+    def test_null_singleton(self):
+        with self.assertRaises(Exception):
+            arrow.NAType()
+
+    def test_bool(self):
+        pass
+
+    def test_int64(self):
+        arr = arrow.from_pylist([1, 2, None])
+
+        v = arr[0]
+        assert isinstance(v, arrow.Int64Value)
+        assert repr(v) == "1"
+        assert v.as_py() == 1
+
+        assert arr[2] is arrow.NA
+
+    def test_double(self):
+        arr = arrow.from_pylist([1.5, None, 3])
+
+        v = arr[0]
+        assert isinstance(v, arrow.DoubleValue)
+        assert repr(v) == "1.5"
+        assert v.as_py() == 1.5
+
+        assert arr[1] is arrow.NA
+
+        v = arr[2]
+        assert v.as_py() == 3.0
+
+    def test_string(self):
+        arr = arrow.from_pylist(['foo', None, u('bar')])
+
+        v = arr[0]
+        assert isinstance(v, arrow.StringValue)
+        assert repr(v) == "'foo'"
+        assert v.as_py() == 'foo'
+
+        assert arr[1] is arrow.NA
+
+        v = arr[2].as_py()
+        assert v == 'bar'
+        assert isinstance(v, str)
+
+    def test_list(self):
+        arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
+
+        v = arr[0]
+        assert len(v) == 2
+        assert isinstance(v, arrow.ListValue)
+        assert repr(v) == "['foo', None]"
+        assert v.as_py() == ['foo', None]
+        assert v[0].as_py() == 'foo'
+        assert v[1] is arrow.NA
+
+        assert arr[1] is arrow.NA
+
+        v = arr[3]
+        assert len(v) == 0

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
new file mode 100644
index 0000000..0235526
--- /dev/null
+++ b/python/pyarrow/tests/test_schema.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow as arrow
+
+
+class TestTypes(unittest.TestCase):
+
+    def test_integers(self):
+        dtypes = ['int8', 'int16', 'int32', 'int64',
+                  'uint8', 'uint16', 'uint32', 'uint64']
+
+        for name in dtypes:
+            factory = getattr(arrow, name)
+            t = factory()
+            t_required = factory(False)
+
+            assert str(t) == name
+            assert str(t_required) == '{0} not null'.format(name)
+
+    def test_list(self):
+        value_type = arrow.int32()
+        list_type = arrow.list_(value_type)
+        assert str(list_type) == 'list<int32>'
+
+    def test_string(self):
+        t = arrow.string()
+        assert str(t) == 'string'
+
+    def test_field(self):
+        t = arrow.string()
+        f = arrow.field('foo', t)
+
+        assert f.name == 'foo'
+        assert f.type is t
+        assert repr(f) == "Field('foo', type=string)"

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/requirements.txt
----------------------------------------------------------------------
diff --git a/python/requirements.txt b/python/requirements.txt
index a82cb20..f42c90c 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,4 +1,3 @@
 pytest
 numpy>=1.7.0
-pandas>=0.12.0
 six

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index eb3ff2a..5cc871a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -27,7 +27,7 @@ import Cython
 import sys
 
 import pkg_resources
-from setuptools import setup
+from setuptools import setup, Extension
 
 import os
 
@@ -40,10 +40,12 @@ from distutils import sysconfig
 is_64_bit = sys.maxsize > 2**32
 
 # Check if this is a debug build of Python.
-if hasattr(sys, 'gettotalrefcount'):
-    build_type = 'Debug'
-else:
-    build_type = 'Release'
+# if hasattr(sys, 'gettotalrefcount'):
+#     build_type = 'Debug'
+# else:
+#     build_type = 'Release'
+
+build_type = 'Debug'
 
 if Cython.__version__ < '0.19.1':
     raise Exception('Please upgrade to Cython 0.19.1 or newer')
@@ -51,7 +53,7 @@ if Cython.__version__ < '0.19.1':
 MAJOR = 0
 MINOR = 1
 MICRO = 0
-VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
+VERSION = '%d.%d.%ddev' % (MAJOR, MINOR, MICRO)
 
 
 class clean(_clean):
@@ -70,6 +72,9 @@ class build_ext(_build_ext):
     def build_extensions(self):
         numpy_incl = pkg_resources.resource_filename('numpy', 'core/include')
 
+        self.extensions = [ext for ext in self.extensions
+                           if ext.name != '__dummy__']
+
         for ext in self.extensions:
             if (hasattr(ext, 'include_dirs') and
                     numpy_incl not in ext.include_dirs):
@@ -98,6 +103,7 @@ class build_ext(_build_ext):
 
         # The staging directory for the module being built
         build_temp = pjoin(os.getcwd(), self.build_temp)
+        build_lib = os.path.join(os.getcwd(), self.build_lib)
 
         # Change to the build directory
         saved_cwd = os.getcwd()
@@ -124,7 +130,7 @@ class build_ext(_build_ext):
                              static_lib_option, source]
 
             self.spawn(cmake_command)
-            args = ['make']
+            args = ['make', 'VERBOSE=1']
             if 'PYARROW_PARALLEL' in os.environ:
                 args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL']))
             self.spawn(args)
@@ -150,21 +156,19 @@ class build_ext(_build_ext):
         if self.inplace:
             # a bit hacky
             build_lib = saved_cwd
-        else:
-            build_lib = pjoin(os.getcwd(), self.build_lib)
 
         # Move the built libpyarrow library to the place expected by the Python
         # build
         if sys.platform != 'win32':
             name, = glob.glob('libpyarrow.*')
             try:
-                os.makedirs(pjoin(build_lib, 'arrow'))
+                os.makedirs(pjoin(build_lib, 'pyarrow'))
             except OSError:
                 pass
-            shutil.move(name, pjoin(build_lib, 'arrow', name))
+            shutil.move(name, pjoin(build_lib, 'pyarrow', name))
         else:
             shutil.move(pjoin(build_type, 'pyarrow.dll'),
-                        pjoin(build_lib, 'arrow', 'pyarrow.dll'))
+                        pjoin(build_lib, 'pyarrow', 'pyarrow.dll'))
 
         # Move the built C-extension to the place expected by the Python build
         self._found_names = []
@@ -192,7 +196,7 @@ class build_ext(_build_ext):
     def _get_cmake_ext_path(self, name):
         # Get the package directory from build_py
         build_py = self.get_finalized_command('build_py')
-        package_dir = build_py.get_package_dir('arrow')
+        package_dir = build_py.get_package_dir('pyarrow')
         # This is the name of the arrow C-extension
         suffix = sysconfig.get_config_var('EXT_SUFFIX')
         if suffix is None:
@@ -217,23 +221,23 @@ class build_ext(_build_ext):
 
     def get_outputs(self):
         # Just the C extensions
-        cmake_exts = [self._get_cmake_ext_path(name)
-                      for name in self.get_names()]
-        regular_exts = _build_ext.get_outputs(self)
-        return regular_exts + cmake_exts
+        # regular_exts = _build_ext.get_outputs(self)
+        return [self._get_cmake_ext_path(name)
+                for name in self.get_names()]
 
 
-extensions = []
-
 DESC = """\
 Python library for Apache Arrow"""
 
 setup(
-    name="arrow",
-    packages=['arrow', 'arrow.tests'],
+    name="pyarrow",
+    packages=['pyarrow', 'pyarrow.tests'],
     version=VERSION,
-    package_data={'arrow': ['*.pxd', '*.pyx']},
-    ext_modules=extensions,
+    zip_safe=False,
+    package_data={'pyarrow': ['*.pxd', '*.pyx']},
+    # Dummy extension to trigger build_ext
+    ext_modules=[Extension('__dummy__', sources=[])],
+
     cmdclass={
         'clean': clean,
         'build_ext': build_ext
@@ -243,5 +247,5 @@ setup(
     license='Apache License, Version 2.0',
     maintainer="Apache Arrow Developers",
     maintainer_email="dev@arrow.apache.org",
-    test_suite="arrow.tests"
+    test_suite="pyarrow.tests"
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/src/pyarrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/util/CMakeLists.txt b/python/src/pyarrow/util/CMakeLists.txt
index 3fd8bac..4afb4d0 100644
--- a/python/src/pyarrow/util/CMakeLists.txt
+++ b/python/src/pyarrow/util/CMakeLists.txt
@@ -19,19 +19,21 @@
 # pyarrow_test_main
 #######################################
 
-add_library(pyarrow_test_main
-  test_main.cc)
+if (PYARROW_BUILD_TESTS)
+  add_library(pyarrow_test_main
+	test_main.cc)
 
-if (APPLE)
-  target_link_libraries(pyarrow_test_main
-    gtest
-    dl)
-  set_target_properties(pyarrow_test_main
-        PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-else()
-  target_link_libraries(pyarrow_test_main
-    gtest
-    pthread
-    dl
-  )
+  if (APPLE)
+	target_link_libraries(pyarrow_test_main
+      gtest
+      dl)
+	set_target_properties(pyarrow_test_main
+      PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  else()
+	target_link_libraries(pyarrow_test_main
+      gtest
+      pthread
+      dl
+	  )
+  endif()
 endif()


[2/2] arrow git commit: ARROW-54: [Python] Rename package to "pyarrow"

Posted by we...@apache.org.
ARROW-54: [Python] Rename package to "pyarrow"

Also fixed rpath issues (at great cost) per ARROW-53

Author: Wes McKinney <we...@apache.org>

Closes #23 from wesm/ARROW-54 and squashes the following commits:

b8ce0e8 [Wes McKinney] Update installation instructions
cae9b39 [Wes McKinney] Fix rpath issues per ARROW-53
7554539 [Wes McKinney] Twiddle rpath stuff, remove empty arrow_test_util module
8cca41a [Wes McKinney] Fix Travis CI script for renamed package
1d37c93 [Wes McKinney] Opt in to building unit tests
60088d0 [Wes McKinney] Rename package to pyarrow
e3d0caf [Wes McKinney] Note on other Python interpreters
80d3bac [Wes McKinney] Start installation document


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/6fdcd494
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/6fdcd494
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/6fdcd494

Branch: refs/heads/master
Commit: 6fdcd4943ff9a8cc66afbee380217cec40c0cda0
Parents: 8367527
Author: Wes McKinney <we...@apache.org>
Authored: Wed Mar 9 15:45:05 2016 -0800
Committer: Wes McKinney <we...@apache.org>
Committed: Wed Mar 9 15:45:05 2016 -0800

----------------------------------------------------------------------
 .travis.yml                                  |   4 +-
 ci/travis_script_python.sh                   |   2 +-
 cpp/CMakeLists.txt                           |  29 ++--
 cpp/src/arrow/CMakeLists.txt                 |   2 +-
 cpp/src/arrow/util/CMakeLists.txt            |  44 ++---
 python/CMakeLists.txt                        |  31 ++--
 python/arrow/__init__.py                     |  38 -----
 python/arrow/array.pxd                       |  85 ----------
 python/arrow/array.pyx                       | 192 ---------------------
 python/arrow/compat.py                       |  92 ----------
 python/arrow/config.pyx                      |   8 -
 python/arrow/error.pxd                       |  20 ---
 python/arrow/error.pyx                       |  30 ----
 python/arrow/formatting.py                   |  56 ------
 python/arrow/includes/__init__.pxd           |   0
 python/arrow/includes/arrow.pxd              | 124 --------------
 python/arrow/includes/common.pxd             |  36 ----
 python/arrow/includes/parquet.pxd            |  51 ------
 python/arrow/includes/pyarrow.pxd            |  45 -----
 python/arrow/parquet.pyx                     |  23 ---
 python/arrow/scalar.pxd                      |  66 --------
 python/arrow/scalar.pyx                      | 198 ----------------------
 python/arrow/schema.pxd                      |  41 -----
 python/arrow/schema.pyx                      | 164 ------------------
 python/arrow/tests/__init__.py               |   0
 python/arrow/tests/test_array.py             |  63 -------
 python/arrow/tests/test_convert_builtin.py   |  85 ----------
 python/arrow/tests/test_scalars.py           |  82 ---------
 python/arrow/tests/test_schema.py            |  51 ------
 python/doc/INSTALL.md                        |  87 ++++++++++
 python/pyarrow/__init__.py                   |  38 +++++
 python/pyarrow/array.pxd                     |  85 ++++++++++
 python/pyarrow/array.pyx                     | 192 +++++++++++++++++++++
 python/pyarrow/compat.py                     |  92 ++++++++++
 python/pyarrow/config.pyx                    |   8 +
 python/pyarrow/error.pxd                     |  20 +++
 python/pyarrow/error.pyx                     |  29 ++++
 python/pyarrow/formatting.py                 |  56 ++++++
 python/pyarrow/includes/__init__.pxd         |   0
 python/pyarrow/includes/common.pxd           |  36 ++++
 python/pyarrow/includes/libarrow.pxd         | 124 ++++++++++++++
 python/pyarrow/includes/parquet.pxd          |  51 ++++++
 python/pyarrow/includes/pyarrow.pxd          |  45 +++++
 python/pyarrow/parquet.pyx                   |  23 +++
 python/pyarrow/scalar.pxd                    |  66 ++++++++
 python/pyarrow/scalar.pyx                    | 198 ++++++++++++++++++++++
 python/pyarrow/schema.pxd                    |  41 +++++
 python/pyarrow/schema.pyx                    | 164 ++++++++++++++++++
 python/pyarrow/tests/__init__.py             |   0
 python/pyarrow/tests/test_array.py           |  63 +++++++
 python/pyarrow/tests/test_convert_builtin.py |  85 ++++++++++
 python/pyarrow/tests/test_scalars.py         |  82 +++++++++
 python/pyarrow/tests/test_schema.py          |  51 ++++++
 python/requirements.txt                      |   1 -
 python/setup.py                              |  52 +++---
 python/src/pyarrow/util/CMakeLists.txt       |  30 ++--
 56 files changed, 1740 insertions(+), 1641 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 9e858d7..49a956e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,7 +27,8 @@ matrix:
     - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
     - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh
   - compiler: clang
-    language: cpp
+    language: objective-c
+    osx_image: xcode6.4
     os: osx
     addons:
     before_script:
@@ -40,7 +41,6 @@ before_install:
 - ulimit -c unlimited -S
 - export CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
 - export ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install
-- export LD_LIBRARY_PATH=$ARROW_CPP_INSTALL/lib:$LD_LIBRARY_PATH
 
 after_script:
 - rm -rf $CPP_BUILD_DIR

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 9b0bd4f..14d66b4 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -48,7 +48,7 @@ export ARROW_HOME=$ARROW_CPP_INSTALL
 
 python setup.py build_ext --inplace
 
-py.test -vv -r sxX arrow
+py.test -vv -r sxX pyarrow
 
 # if [ $TRAVIS_OS_NAME == "linux" ]; then
 #   valgrind --tool=memcheck py.test -vv -r sxX arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index e8cb88c..f5f6038 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -37,11 +37,6 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
   set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 endif()
 
-if(APPLE)
-  # In newer versions of CMake, this is the default setting
-  set(CMAKE_MACOSX_RPATH 1)
-endif()
-
 find_program(CCACHE_FOUND ccache)
 if(CCACHE_FOUND)
   set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
@@ -339,10 +334,13 @@ endfunction()
 if ("$ENV{GTEST_HOME}" STREQUAL "")
   set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
 endif()
-find_package(GTest REQUIRED)
-include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-ADD_THIRDPARTY_LIB(gtest
-  STATIC_LIB ${GTEST_STATIC_LIB})
+
+if(ARROW_BUILD_TESTS)
+  find_package(GTest REQUIRED)
+  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(gtest
+    STATIC_LIB ${GTEST_STATIC_LIB})
+endif()
 
 ## Google PerfTools
 ##
@@ -366,7 +364,7 @@ ADD_THIRDPARTY_LIB(gtest
 ############################################################
 # Linker setup
 ############################################################
-set(ARROW_MIN_TEST_LIBS arrow arrow_test_main arrow_test_util ${ARROW_BASE_LIBS})
+set(ARROW_MIN_TEST_LIBS arrow arrow_test_main ${ARROW_BASE_LIBS})
 set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
 
 ############################################################
@@ -461,9 +459,18 @@ add_library(arrow
   ${LIBARROW_LINKAGE}
   ${ARROW_SRCS}
 )
+
+if (APPLE)
+  set_target_properties(arrow
+    PROPERTIES
+    BUILD_WITH_INSTALL_RPATH ON
+    INSTALL_NAME_DIR "@rpath")
+endif()
+
 set_target_properties(arrow
   PROPERTIES
-  LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+  LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
+)
 target_link_libraries(arrow ${LIBARROW_LINK_LIBS})
 
 add_subdirectory(src/arrow)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 77326ce..73e6a9b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -27,6 +27,6 @@ install(FILES
 # Unit tests
 #######################################
 
-set(ARROW_TEST_LINK_LIBS arrow_test_util ${ARROW_MIN_TEST_LIBS})
+set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
 
 ADD_ARROW_TEST(array-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 4272ce4..d8e2f98 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -29,36 +29,26 @@ install(FILES
   DESTINATION include/arrow/util)
 
 #######################################
-# arrow_test_util
-#######################################
-
-add_library(arrow_test_util)
-target_link_libraries(arrow_test_util
-)
-
-SET_TARGET_PROPERTIES(arrow_test_util PROPERTIES LINKER_LANGUAGE CXX)
-
-#######################################
 # arrow_test_main
 #######################################
 
-add_library(arrow_test_main
-  test_main.cc)
-
-if (APPLE)
-  target_link_libraries(arrow_test_main
-    gtest
-	arrow_test_util
-    dl)
-  set_target_properties(arrow_test_main
-        PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-else()
-  target_link_libraries(arrow_test_main
-    gtest
-	arrow_test_util
-    pthread
-    dl
-  )
+if (ARROW_BUILD_TESTS)
+  add_library(arrow_test_main
+	test_main.cc)
+
+  if (APPLE)
+	target_link_libraries(arrow_test_main
+      gtest
+      dl)
+	set_target_properties(arrow_test_main
+      PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  else()
+	target_link_libraries(arrow_test_main
+      gtest
+      pthread
+      dl
+	  )
+  endif()
 endif()
 
 ADD_ARROW_TEST(bit-util-test)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8f5c27b..0ecafc7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,6 +45,13 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
   set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 endif()
 
+# Top level cmake dir
+if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
+  option(PYARROW_BUILD_TESTS
+    "Build the PyArrow C++ googletest unit tests"
+    OFF)
+endif()
+
 find_program(CCACHE_FOUND ccache)
 if(CCACHE_FOUND)
   set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
@@ -322,10 +329,12 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
 endfunction()
 
 ## GMock
-find_package(GTest REQUIRED)
-include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-ADD_THIRDPARTY_LIB(gtest
-  STATIC_LIB ${GTEST_STATIC_LIB})
+if (PYARROW_BUILD_TESTS)
+  find_package(GTest REQUIRED)
+  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(gtest
+    STATIC_LIB ${GTEST_STATIC_LIB})
+endif()
 
 ## Arrow
 find_package(Arrow REQUIRED)
@@ -391,6 +400,10 @@ endif (UNIX)
 # Subdirectories
 ############################################################
 
+if (UNIX)
+  set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+endif()
+
 add_subdirectory(src/pyarrow)
 add_subdirectory(src/pyarrow/util)
 
@@ -407,10 +420,11 @@ set(LINK_LIBS
   arrow
 )
 
+SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
 add_library(pyarrow SHARED
   ${PYARROW_SRCS})
 target_link_libraries(pyarrow ${LINK_LIBS})
-set_target_properties(pyarrow PROPERTIES LINKER_LANGUAGE CXX)
 
 if(APPLE)
   set_target_properties(pyarrow PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
@@ -420,9 +434,6 @@ endif()
 # Setup and build Cython modules
 ############################################################
 
-set(USE_RELATIVE_RPATH ON)
-set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-
 set(CYTHON_EXTENSIONS
   array
   config
@@ -437,7 +448,7 @@ foreach(module ${CYTHON_EXTENSIONS})
     list(REMOVE_AT directories -1)
 
     string(REPLACE "." "/" module_root "${module}")
-    set(module_SRC arrow/${module_root}.pyx)
+    set(module_SRC pyarrow/${module_root}.pyx)
     set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX 1)
 
     cython_add_module(${module_name}
@@ -463,7 +474,7 @@ foreach(module ${CYTHON_EXTENSIONS})
     endwhile(${i} GREATER 0)
 
     # for inplace development for now
-    set(module_install_rpath "${CMAKE_SOURCE_DIR}/arrow/")
+    #set(module_install_rpath "${CMAKE_SOURCE_DIR}/pyarrow/")
 
     set_target_properties(${module_name} PROPERTIES
       INSTALL_RPATH ${module_install_rpath})

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/arrow/__init__.py b/python/arrow/__init__.py
deleted file mode 100644
index 3507ea0..0000000
--- a/python/arrow/__init__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# flake8: noqa
-
-from arrow.array import (Array, from_pylist, total_allocated_bytes,
-                         BooleanArray, NumericArray,
-                         Int8Array, UInt8Array,
-                         ListArray, StringArray)
-
-from arrow.error import ArrowException
-
-from arrow.scalar import (ArrayValue, Scalar, NA, NAType,
-                          BooleanValue,
-                          Int8Value, Int16Value, Int32Value, Int64Value,
-                          UInt8Value, UInt16Value, UInt32Value, UInt64Value,
-                          FloatValue, DoubleValue, ListValue, StringValue)
-
-from arrow.schema import (null, bool_,
-                          int8, int16, int32, int64,
-                          uint8, uint16, uint32, uint64,
-                          float_, double, string,
-                          list_, struct, field,
-                          DataType, Field, Schema)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/array.pxd b/python/arrow/array.pxd
deleted file mode 100644
index 482f8f7..0000000
--- a/python/arrow/array.pxd
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport shared_ptr
-from arrow.includes.arrow cimport CArray, LogicalType
-
-from arrow.scalar import NA
-
-from arrow.schema cimport DataType
-
-cdef extern from "Python.h":
-    int PySlice_Check(object)
-
-cdef class Array:
-    cdef:
-        shared_ptr[CArray] sp_array
-        CArray* ap
-
-    cdef readonly:
-        DataType type
-
-    cdef init(self, const shared_ptr[CArray]& sp_array)
-    cdef getitem(self, int i)
-
-
-cdef class BooleanArray(Array):
-    pass
-
-
-cdef class NumericArray(Array):
-    pass
-
-
-cdef class Int8Array(NumericArray):
-    pass
-
-
-cdef class UInt8Array(NumericArray):
-    pass
-
-
-cdef class Int16Array(NumericArray):
-    pass
-
-
-cdef class UInt16Array(NumericArray):
-    pass
-
-
-cdef class Int32Array(NumericArray):
-    pass
-
-
-cdef class UInt32Array(NumericArray):
-    pass
-
-
-cdef class Int64Array(NumericArray):
-    pass
-
-
-cdef class UInt64Array(NumericArray):
-    pass
-
-
-cdef class ListArray(Array):
-    pass
-
-
-cdef class StringArray(Array):
-    pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx
deleted file mode 100644
index b367e3b..0000000
--- a/python/arrow/array.pyx
+++ /dev/null
@@ -1,192 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from arrow.includes.arrow cimport *
-cimport arrow.includes.pyarrow as pyarrow
-
-from arrow.compat import frombytes, tobytes
-from arrow.error cimport check_status
-
-cimport arrow.scalar as scalar
-from arrow.scalar import NA
-
-def total_allocated_bytes():
-    cdef MemoryPool* pool = pyarrow.GetMemoryPool()
-    return pool.bytes_allocated()
-
-
-cdef class Array:
-
-    cdef init(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-        self.ap = sp_array.get()
-        self.type = DataType()
-        self.type.init(self.sp_array.get().type())
-
-    property null_count:
-
-        def __get__(self):
-            return self.sp_array.get().null_count()
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield self.getitem(i)
-        raise StopIteration
-
-    def __repr__(self):
-        from arrow.formatting import array_format
-        type_format = object.__repr__(self)
-        values = array_format(self, window=10)
-        return '{0}\n{1}'.format(type_format, values)
-
-    def __len__(self):
-        return self.sp_array.get().length()
-
-    def isnull(self):
-        raise NotImplemented
-
-    def __getitem__(self, key):
-        cdef:
-            Py_ssize_t n = len(self)
-
-        if PySlice_Check(key):
-            start = key.start or 0
-            while start < 0:
-                start += n
-
-            stop = key.stop if key.stop is not None else n
-            while stop < 0:
-                stop += n
-
-            step = key.step or 1
-            if step != 1:
-                raise NotImplementedError
-            else:
-                return self.slice(start, stop)
-
-        while key < 0:
-            key += len(self)
-
-        return self.getitem(key)
-
-    cdef getitem(self, int i):
-        return scalar.box_arrow_scalar(self.type, self.sp_array, i)
-
-    def slice(self, start, end):
-        pass
-
-
-cdef class NullArray(Array):
-    pass
-
-
-cdef class BooleanArray(Array):
-    pass
-
-
-cdef class NumericArray(Array):
-    pass
-
-
-cdef class Int8Array(NumericArray):
-    pass
-
-
-cdef class UInt8Array(NumericArray):
-    pass
-
-
-cdef class Int16Array(NumericArray):
-    pass
-
-
-cdef class UInt16Array(NumericArray):
-    pass
-
-
-cdef class Int32Array(NumericArray):
-    pass
-
-
-cdef class UInt32Array(NumericArray):
-    pass
-
-
-cdef class Int64Array(NumericArray):
-    pass
-
-
-cdef class UInt64Array(NumericArray):
-    pass
-
-
-cdef class FloatArray(NumericArray):
-    pass
-
-
-cdef class DoubleArray(NumericArray):
-    pass
-
-
-cdef class ListArray(Array):
-    pass
-
-
-cdef class StringArray(Array):
-    pass
-
-
-cdef dict _array_classes = {
-    LogicalType_NA: NullArray,
-    LogicalType_BOOL: BooleanArray,
-    LogicalType_INT64: Int64Array,
-    LogicalType_DOUBLE: DoubleArray,
-    LogicalType_LIST: ListArray,
-    LogicalType_STRING: StringArray,
-}
-
-cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
-    if sp_array.get() == NULL:
-        raise ValueError('Array was NULL')
-
-    cdef CDataType* data_type = sp_array.get().type().get()
-
-    if data_type == NULL:
-        raise ValueError('Array data type was NULL')
-
-    cdef Array arr = _array_classes[data_type.type]()
-    arr.init(sp_array)
-    return arr
-
-
-def from_pylist(object list_obj, DataType type=None):
-    """
-    Convert Python list to Arrow array
-    """
-    cdef:
-        shared_ptr[CArray] sp_array
-
-    if type is None:
-        check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
-    else:
-        raise NotImplementedError
-
-    return box_arrow_array(sp_array)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/compat.py
----------------------------------------------------------------------
diff --git a/python/arrow/compat.py b/python/arrow/compat.py
deleted file mode 100644
index 08f0f23..0000000
--- a/python/arrow/compat.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# flake8: noqa
-
-import itertools
-
-import numpy as np
-
-import sys
-import six
-from six import BytesIO, StringIO, string_types as py_string
-
-
-PY26 = sys.version_info[:2] == (2, 6)
-PY2 = sys.version_info[0] == 2
-
-
-if PY26:
-    import unittest2 as unittest
-else:
-    import unittest
-
-
-if PY2:
-    import cPickle
-
-    try:
-        from cdecimal import Decimal
-    except ImportError:
-        from decimal import Decimal
-
-    unicode_type = unicode
-    lzip = zip
-    zip = itertools.izip
-
-    def dict_values(x):
-        return x.values()
-
-    range = xrange
-    long = long
-
-    def u(s):
-        return unicode(s, "unicode_escape")
-
-    def tobytes(o):
-        if isinstance(o, unicode):
-            return o.encode('utf8')
-        else:
-            return o
-
-    def frombytes(o):
-        return o
-else:
-    unicode_type = str
-    def lzip(*x):
-        return list(zip(*x))
-    long = int
-    zip = zip
-    def dict_values(x):
-        return list(x.values())
-    from decimal import Decimal
-    range = range
-
-    def u(s):
-        return s
-
-    def tobytes(o):
-        if isinstance(o, str):
-            return o.encode('utf8')
-        else:
-            return o
-
-    def frombytes(o):
-        return o.decode('utf8')
-
-
-integer_types = six.integer_types + (np.integer,)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/config.pyx b/python/arrow/config.pyx
deleted file mode 100644
index 521bc06..0000000
--- a/python/arrow/config.pyx
+++ /dev/null
@@ -1,8 +0,0 @@
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-cdef extern from 'pyarrow/init.h' namespace 'pyarrow':
-    void pyarrow_init()
-
-pyarrow_init()

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/error.pxd b/python/arrow/error.pxd
deleted file mode 100644
index c18cb3e..0000000
--- a/python/arrow/error.pxd
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.pyarrow cimport *
-
-cdef check_status(const Status& status)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/error.pyx b/python/arrow/error.pyx
deleted file mode 100644
index f1d5163..0000000
--- a/python/arrow/error.pyx
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport c_string
-
-from arrow.compat import frombytes
-
-class ArrowException(Exception):
-    pass
-
-cdef check_status(const Status& status):
-    if status.ok():
-        return
-
-    cdef c_string c_message = status.ToString()
-    raise ArrowException(frombytes(c_message))

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/formatting.py
----------------------------------------------------------------------
diff --git a/python/arrow/formatting.py b/python/arrow/formatting.py
deleted file mode 100644
index a42d4e4..0000000
--- a/python/arrow/formatting.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Pretty-printing and other formatting utilities for Arrow data structures
-
-import arrow.scalar as scalar
-
-
-def array_format(arr, window=None):
-    values = []
-
-    if window is None or window * 2 >= len(arr):
-        for x in arr:
-            values.append(value_format(x, 0))
-        contents = _indent(',\n'.join(values), 2)
-    else:
-        for i in range(window):
-            values.append(value_format(arr[i], 0) + ',')
-        values.append('...')
-        for i in range(len(arr) - window, len(arr)):
-            formatted = value_format(arr[i], 0)
-            if i < len(arr) - 1:
-                formatted += ','
-            values.append(formatted)
-        contents = _indent('\n'.join(values), 2)
-
-    return '[\n{0}\n]'.format(contents)
-
-
-def value_format(x, indent_level=0):
-    if isinstance(x, scalar.ListValue):
-        contents = ',\n'.join(value_format(item) for item in x)
-        return '[{0}]'.format(_indent(contents, 1).strip())
-    else:
-        return repr(x)
-
-
-def _indent(text, spaces):
-    if spaces == 0:
-        return text
-    block = ' ' * spaces
-    return '\n'.join(block + x for x in text.split('\n'))

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/__init__.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/__init__.pxd b/python/arrow/includes/__init__.pxd
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/arrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd
deleted file mode 100644
index 0cc44c0..0000000
--- a/python/arrow/includes/arrow.pxd
+++ /dev/null
@@ -1,124 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from arrow.includes.common cimport *
-
-cdef extern from "arrow/api.h" namespace "arrow" nogil:
-
-    enum LogicalType" arrow::LogicalType::type":
-        LogicalType_NA" arrow::LogicalType::NA"
-
-        LogicalType_BOOL" arrow::LogicalType::BOOL"
-
-        LogicalType_UINT8" arrow::LogicalType::UINT8"
-        LogicalType_INT8" arrow::LogicalType::INT8"
-        LogicalType_UINT16" arrow::LogicalType::UINT16"
-        LogicalType_INT16" arrow::LogicalType::INT16"
-        LogicalType_UINT32" arrow::LogicalType::UINT32"
-        LogicalType_INT32" arrow::LogicalType::INT32"
-        LogicalType_UINT64" arrow::LogicalType::UINT64"
-        LogicalType_INT64" arrow::LogicalType::INT64"
-
-        LogicalType_FLOAT" arrow::LogicalType::FLOAT"
-        LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
-
-        LogicalType_STRING" arrow::LogicalType::STRING"
-
-        LogicalType_LIST" arrow::LogicalType::LIST"
-        LogicalType_STRUCT" arrow::LogicalType::STRUCT"
-
-    cdef cppclass CDataType" arrow::DataType":
-        LogicalType type
-        c_bool nullable
-
-        c_bool Equals(const CDataType* other)
-
-        c_string ToString()
-
-    cdef cppclass MemoryPool" arrow::MemoryPool":
-        int64_t bytes_allocated()
-
-    cdef cppclass CListType" arrow::ListType"(CDataType):
-        CListType(const shared_ptr[CDataType]& value_type,
-                  c_bool nullable)
-
-    cdef cppclass CStringType" arrow::StringType"(CDataType):
-        pass
-
-    cdef cppclass CField" arrow::Field":
-        c_string name
-        shared_ptr[CDataType] type
-
-        CField(const c_string& name, const shared_ptr[CDataType]& type)
-
-    cdef cppclass CStructType" arrow::StructType"(CDataType):
-        CStructType(const vector[shared_ptr[CField]]& fields,
-                    c_bool nullable)
-
-    cdef cppclass CSchema" arrow::Schema":
-        CSchema(const shared_ptr[CField]& fields)
-
-    cdef cppclass CArray" arrow::Array":
-        const shared_ptr[CDataType]& type()
-
-        int32_t length()
-        int32_t null_count()
-        LogicalType logical_type()
-
-        c_bool IsNull(int i)
-
-    cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
-        uint8_t Value(int i)
-
-    cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
-        int8_t Value(int i)
-
-    cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
-        uint16_t Value(int i)
-
-    cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
-        int16_t Value(int i)
-
-    cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
-        uint32_t Value(int i)
-
-    cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
-        int32_t Value(int i)
-
-    cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
-        uint64_t Value(int i)
-
-    cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
-        int64_t Value(int i)
-
-    cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
-        float Value(int i)
-
-    cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
-        double Value(int i)
-
-    cdef cppclass CListArray" arrow::ListArray"(CArray):
-        const int32_t* offsets()
-        int32_t offset(int i)
-        int32_t value_length(int i)
-        const shared_ptr[CArray]& values()
-        const shared_ptr[CDataType]& value_type()
-
-    cdef cppclass CStringArray" arrow::StringArray"(CListArray):
-        c_string GetString(int i)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/common.pxd b/python/arrow/includes/common.pxd
deleted file mode 100644
index 839427a..0000000
--- a/python/arrow/includes/common.pxd
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from libc.stdint cimport *
-from libcpp cimport bool as c_bool
-from libcpp.string cimport string as c_string
-from libcpp.vector cimport vector
-
-# This must be included for cerr and other things to work
-cdef extern from "<iostream>":
-    pass
-
-cdef extern from "<memory>" namespace "std" nogil:
-
-    cdef cppclass shared_ptr[T]:
-        shared_ptr()
-        shared_ptr(T*)
-        T* get()
-        void reset()
-        void reset(T* p)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/parquet.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/parquet.pxd b/python/arrow/includes/parquet.pxd
deleted file mode 100644
index 62342f3..0000000
--- a/python/arrow/includes/parquet.pxd
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from arrow.includes.common cimport *
-
-cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
-    cdef cppclass ColumnReader:
-        pass
-
-    cdef cppclass BoolReader(ColumnReader):
-        pass
-
-    cdef cppclass Int32Reader(ColumnReader):
-        pass
-
-    cdef cppclass Int64Reader(ColumnReader):
-        pass
-
-    cdef cppclass Int96Reader(ColumnReader):
-        pass
-
-    cdef cppclass FloatReader(ColumnReader):
-        pass
-
-    cdef cppclass DoubleReader(ColumnReader):
-        pass
-
-    cdef cppclass ByteArrayReader(ColumnReader):
-        pass
-
-    cdef cppclass RowGroupReader:
-        pass
-
-    cdef cppclass ParquetFileReader:
-        pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/pyarrow.pxd b/python/arrow/includes/pyarrow.pxd
deleted file mode 100644
index 3eed5b8..0000000
--- a/python/arrow/includes/pyarrow.pxd
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from arrow.includes.common cimport *
-from arrow.includes.arrow cimport (CArray, CDataType, LogicalType,
-                                   MemoryPool)
-
-cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
-    # We can later add more of the common status factory methods as needed
-    cdef Status Status_OK "Status::OK"()
-
-    cdef cppclass Status:
-        Status()
-
-        c_string ToString()
-
-        c_bool ok()
-        c_bool IsOutOfMemory()
-        c_bool IsKeyError()
-        c_bool IsTypeError()
-        c_bool IsIOError()
-        c_bool IsValueError()
-        c_bool IsNotImplemented()
-        c_bool IsArrowError()
-
-    shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
-    Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
-
-    MemoryPool* GetMemoryPool()

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/parquet.pyx b/python/arrow/parquet.pyx
deleted file mode 100644
index 23c3838..0000000
--- a/python/arrow/parquet.pyx
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from arrow.compat import frombytes, tobytes
-from arrow.includes.parquet cimport *

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pxd b/python/arrow/scalar.pxd
deleted file mode 100644
index 4e0a364..0000000
--- a/python/arrow/scalar.pxd
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport *
-from arrow.includes.arrow cimport *
-
-from arrow.schema cimport DataType
-
-cdef class Scalar:
-    cdef readonly:
-        DataType type
-
-
-cdef class NAType(Scalar):
-    pass
-
-
-cdef class ArrayValue(Scalar):
-    cdef:
-        shared_ptr[CArray] sp_array
-        int index
-
-    cdef void init(self, DataType type,
-                   const shared_ptr[CArray]& sp_array, int index)
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array)
-
-
-cdef class Int8Value(ArrayValue):
-    pass
-
-
-cdef class Int64Value(ArrayValue):
-    pass
-
-
-cdef class ListValue(ArrayValue):
-    cdef readonly:
-        DataType value_type
-
-    cdef:
-        CListArray* ap
-
-    cdef getitem(self, int i)
-
-
-cdef class StringValue(ArrayValue):
-    pass
-
-cdef object box_arrow_scalar(DataType type,
-                             const shared_ptr[CArray]& sp_array,
-                             int index)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pyx b/python/arrow/scalar.pyx
deleted file mode 100644
index 72a280e..0000000
--- a/python/arrow/scalar.pyx
+++ /dev/null
@@ -1,198 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.schema cimport DataType, box_data_type
-
-from arrow.compat import frombytes
-import arrow.schema as schema
-
-NA = None
-
-cdef class NAType(Scalar):
-
-    def __cinit__(self):
-        global NA
-        if NA is not None:
-            raise Exception('Cannot create multiple NAType instances')
-
-        self.type = schema.null()
-
-    def __repr__(self):
-        return 'NA'
-
-    def as_py(self):
-        return None
-
-NA = NAType()
-
-cdef class ArrayValue(Scalar):
-
-    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
-                   int index):
-        self.type = type
-        self.index = index
-        self._set_array(sp_array)
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-
-    def __repr__(self):
-        if hasattr(self, 'as_py'):
-            return repr(self.as_py())
-        else:
-            return Scalar.__repr__(self)
-
-
-cdef class BooleanValue(ArrayValue):
-    pass
-
-
-cdef class Int8Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt8Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int16Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt16Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt32Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class Int64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class UInt64Value(ArrayValue):
-
-    def as_py(self):
-        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class FloatValue(ArrayValue):
-
-    def as_py(self):
-        cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class DoubleValue(ArrayValue):
-
-    def as_py(self):
-        cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
-        return ap.Value(self.index)
-
-
-cdef class StringValue(ArrayValue):
-
-    def as_py(self):
-        cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
-        return frombytes(ap.GetString(self.index))
-
-
-cdef class ListValue(ArrayValue):
-
-    def __len__(self):
-        return self.ap.value_length(self.index)
-
-    def __getitem__(self, i):
-        return self.getitem(i)
-
-    def __iter__(self):
-        for i in range(len(self)):
-            yield self.getitem(i)
-        raise StopIteration
-
-    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
-        self.sp_array = sp_array
-        self.ap = <CListArray*> sp_array.get()
-        self.value_type = box_data_type(self.ap.value_type())
-
-    cdef getitem(self, int i):
-        cdef int j = self.ap.offset(self.index) + i
-        return box_arrow_scalar(self.value_type, self.ap.values(), j)
-
-    def as_py(self):
-        cdef:
-            int j
-            list result = []
-
-        for j in range(len(self)):
-            result.append(self.getitem(j).as_py())
-
-        return result
-
-
-cdef dict _scalar_classes = {
-    LogicalType_UINT8: Int8Value,
-    LogicalType_UINT16: Int16Value,
-    LogicalType_UINT32: Int32Value,
-    LogicalType_UINT64: Int64Value,
-    LogicalType_INT8: Int8Value,
-    LogicalType_INT16: Int16Value,
-    LogicalType_INT32: Int32Value,
-    LogicalType_INT64: Int64Value,
-    LogicalType_FLOAT: FloatValue,
-    LogicalType_DOUBLE: DoubleValue,
-    LogicalType_LIST: ListValue,
-    LogicalType_STRING: StringValue
-}
-
-cdef object box_arrow_scalar(DataType type,
-                             const shared_ptr[CArray]& sp_array,
-                             int index):
-    cdef ArrayValue val
-    if sp_array.get().IsNull(index):
-        return NA
-    else:
-        val = _scalar_classes[type.type.type]()
-        val.init(type, sp_array, index)
-        return val

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pxd b/python/arrow/schema.pxd
deleted file mode 100644
index 8cc244a..0000000
--- a/python/arrow/schema.pxd
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport shared_ptr
-from arrow.includes.arrow cimport CDataType, CField, CSchema
-
-cdef class DataType:
-    cdef:
-        shared_ptr[CDataType] sp_type
-        CDataType* type
-
-    cdef init(self, const shared_ptr[CDataType]& type)
-
-cdef class Field:
-    cdef:
-        shared_ptr[CField] sp_field
-        CField* field
-
-    cdef readonly:
-        DataType type
-
-cdef class Schema:
-    cdef:
-        shared_ptr[CSchema] sp_schema
-        CSchema* schema
-
-cdef DataType box_data_type(const shared_ptr[CDataType]& type)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx
deleted file mode 100644
index 3001531..0000000
--- a/python/arrow/schema.pyx
+++ /dev/null
@@ -1,164 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-########################################
-# Data types, fields, schemas, and so forth
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from arrow.compat import frombytes, tobytes
-from arrow.includes.arrow cimport *
-cimport arrow.includes.pyarrow as pyarrow
-
-cimport cpython
-
-cdef class DataType:
-
-    def __cinit__(self):
-        pass
-
-    cdef init(self, const shared_ptr[CDataType]& type):
-        self.sp_type = type
-        self.type = type.get()
-
-    def __str__(self):
-        return frombytes(self.type.ToString())
-
-    def __repr__(self):
-        return 'DataType({0})'.format(str(self))
-
-    def __richcmp__(DataType self, DataType other, int op):
-        if op == cpython.Py_EQ:
-            return self.type.Equals(other.type)
-        elif op == cpython.Py_NE:
-            return not self.type.Equals(other.type)
-        else:
-            raise TypeError('Invalid comparison')
-
-
-cdef class Field:
-
-    def __cinit__(self, object name, DataType type):
-        self.type = type
-        self.sp_field.reset(new CField(tobytes(name), type.sp_type))
-        self.field = self.sp_field.get()
-
-    def __repr__(self):
-        return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
-
-    property name:
-
-        def __get__(self):
-            return frombytes(self.field.name)
-
-cdef dict _type_cache = {}
-
-cdef DataType primitive_type(LogicalType type, bint nullable=True):
-    if (type, nullable) in _type_cache:
-        return _type_cache[type, nullable]
-
-    cdef DataType out = DataType()
-    out.init(pyarrow.GetPrimitiveType(type, nullable))
-
-    _type_cache[type, nullable] = out
-    return out
-
-#------------------------------------------------------------
-# Type factory functions
-
-def field(name, type):
-    return Field(name, type)
-
-cdef set PRIMITIVE_TYPES = set([
-    LogicalType_NA, LogicalType_BOOL,
-    LogicalType_UINT8, LogicalType_INT8,
-    LogicalType_UINT16, LogicalType_INT16,
-    LogicalType_UINT32, LogicalType_INT32,
-    LogicalType_UINT64, LogicalType_INT64,
-    LogicalType_FLOAT, LogicalType_DOUBLE])
-
-def null():
-    return primitive_type(LogicalType_NA)
-
-def bool_(c_bool nullable=True):
-    return primitive_type(LogicalType_BOOL, nullable)
-
-def uint8(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT8, nullable)
-
-def int8(c_bool nullable=True):
-    return primitive_type(LogicalType_INT8, nullable)
-
-def uint16(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT16, nullable)
-
-def int16(c_bool nullable=True):
-    return primitive_type(LogicalType_INT16, nullable)
-
-def uint32(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT32, nullable)
-
-def int32(c_bool nullable=True):
-    return primitive_type(LogicalType_INT32, nullable)
-
-def uint64(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT64, nullable)
-
-def int64(c_bool nullable=True):
-    return primitive_type(LogicalType_INT64, nullable)
-
-def float_(c_bool nullable=True):
-    return primitive_type(LogicalType_FLOAT, nullable)
-
-def double(c_bool nullable=True):
-    return primitive_type(LogicalType_DOUBLE, nullable)
-
-def string(c_bool nullable=True):
-    """
-    UTF8 string
-    """
-    return primitive_type(LogicalType_STRING, nullable)
-
-def list_(DataType value_type, c_bool nullable=True):
-    cdef DataType out = DataType()
-    out.init(shared_ptr[CDataType](
-        new CListType(value_type.sp_type, nullable)))
-    return out
-
-def struct(fields, c_bool nullable=True):
-    """
-
-    """
-    cdef:
-        DataType out = DataType()
-        Field field
-        vector[shared_ptr[CField]] c_fields
-
-    for field in fields:
-        c_fields.push_back(field.sp_field)
-
-    out.init(shared_ptr[CDataType](
-        new CStructType(c_fields, nullable)))
-    return out
-
-
-cdef DataType box_data_type(const shared_ptr[CDataType]& type):
-    cdef DataType out = DataType()
-    out.init(type)
-    return out

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/__init__.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/__init__.py b/python/arrow/tests/__init__.py
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_array.py b/python/arrow/tests/test_array.py
deleted file mode 100644
index ebd872c..0000000
--- a/python/arrow/tests/test_array.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest
-import arrow
-import arrow.formatting as fmt
-
-
-class TestArrayAPI(unittest.TestCase):
-
-    def test_getitem_NA(self):
-        arr = arrow.from_pylist([1, None, 2])
-        assert arr[1] is arrow.NA
-
-    def test_list_format(self):
-        arr = arrow.from_pylist([[1], None, [2, 3]])
-        result = fmt.array_format(arr)
-        expected = """\
-[
-  [1],
-  NA,
-  [2,
-   3]
-]"""
-        assert result == expected
-
-    def test_string_format(self):
-        arr = arrow.from_pylist(['foo', None, 'bar'])
-        result = fmt.array_format(arr)
-        expected = """\
-[
-  'foo',
-  NA,
-  'bar'
-]"""
-        assert result == expected
-
-    def test_long_array_format(self):
-        arr = arrow.from_pylist(range(100))
-        result = fmt.array_format(arr, window=2)
-        expected = """\
-[
-  0,
-  1,
-  ...
-  98,
-  99
-]"""
-        assert result == expected

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_convert_builtin.py b/python/arrow/tests/test_convert_builtin.py
deleted file mode 100644
index 57e6ab9..0000000
--- a/python/arrow/tests/test_convert_builtin.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest
-import arrow
-
-
-class TestConvertList(unittest.TestCase):
-
-    def test_boolean(self):
-        pass
-
-    def test_empty_list(self):
-        arr = arrow.from_pylist([])
-        assert len(arr) == 0
-        assert arr.null_count == 0
-        assert arr.type == arrow.null()
-
-    def test_all_none(self):
-        arr = arrow.from_pylist([None, None])
-        assert len(arr) == 2
-        assert arr.null_count == 2
-        assert arr.type == arrow.null()
-
-    def test_integer(self):
-        arr = arrow.from_pylist([1, None, 3, None])
-        assert len(arr) == 4
-        assert arr.null_count == 2
-        assert arr.type == arrow.int64()
-
-    def test_garbage_collection(self):
-        import gc
-        bytes_before = arrow.total_allocated_bytes()
-        arrow.from_pylist([1, None, 3, None])
-        gc.collect()
-        assert arrow.total_allocated_bytes() == bytes_before
-
-    def test_double(self):
-        data = [1.5, 1, None, 2.5, None, None]
-        arr = arrow.from_pylist(data)
-        assert len(arr) == 6
-        assert arr.null_count == 3
-        assert arr.type == arrow.double()
-
-    def test_string(self):
-        data = ['foo', b'bar', None, 'arrow']
-        arr = arrow.from_pylist(data)
-        assert len(arr) == 4
-        assert arr.null_count == 1
-        assert arr.type == arrow.string()
-
-    def test_mixed_nesting_levels(self):
-        arrow.from_pylist([1, 2, None])
-        arrow.from_pylist([[1], [2], None])
-        arrow.from_pylist([[1], [2], [None]])
-
-        with self.assertRaises(arrow.ArrowException):
-            arrow.from_pylist([1, 2, [1]])
-
-        with self.assertRaises(arrow.ArrowException):
-            arrow.from_pylist([1, 2, []])
-
-        with self.assertRaises(arrow.ArrowException):
-            arrow.from_pylist([[1], [2], [None, [1]]])
-
-    def test_list_of_int(self):
-        data = [[1, 2, 3], [], None, [1, 2]]
-        arr = arrow.from_pylist(data)
-        assert len(arr) == 4
-        assert arr.null_count == 1
-        assert arr.type == arrow.list_(arrow.int64())

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_scalars.py b/python/arrow/tests/test_scalars.py
deleted file mode 100644
index 951380b..0000000
--- a/python/arrow/tests/test_scalars.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest, u
-import arrow
-
-
-class TestScalars(unittest.TestCase):
-
-    def test_null_singleton(self):
-        with self.assertRaises(Exception):
-            arrow.NAType()
-
-    def test_bool(self):
-        pass
-
-    def test_int64(self):
-        arr = arrow.from_pylist([1, 2, None])
-
-        v = arr[0]
-        assert isinstance(v, arrow.Int64Value)
-        assert repr(v) == "1"
-        assert v.as_py() == 1
-
-        assert arr[2] is arrow.NA
-
-    def test_double(self):
-        arr = arrow.from_pylist([1.5, None, 3])
-
-        v = arr[0]
-        assert isinstance(v, arrow.DoubleValue)
-        assert repr(v) == "1.5"
-        assert v.as_py() == 1.5
-
-        assert arr[1] is arrow.NA
-
-        v = arr[2]
-        assert v.as_py() == 3.0
-
-    def test_string(self):
-        arr = arrow.from_pylist(['foo', None, u('bar')])
-
-        v = arr[0]
-        assert isinstance(v, arrow.StringValue)
-        assert repr(v) == "'foo'"
-        assert v.as_py() == 'foo'
-
-        assert arr[1] is arrow.NA
-
-        v = arr[2].as_py()
-        assert v == 'bar'
-        assert isinstance(v, str)
-
-    def test_list(self):
-        arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
-
-        v = arr[0]
-        assert len(v) == 2
-        assert isinstance(v, arrow.ListValue)
-        assert repr(v) == "['foo', None]"
-        assert v.as_py() == ['foo', None]
-        assert v[0].as_py() == 'foo'
-        assert v[1] is arrow.NA
-
-        assert arr[1] is arrow.NA
-
-        v = arr[3]
-        assert len(v) == 0

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_schema.py b/python/arrow/tests/test_schema.py
deleted file mode 100644
index a89edd7..0000000
--- a/python/arrow/tests/test_schema.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest
-import arrow
-
-
-class TestTypes(unittest.TestCase):
-
-    def test_integers(self):
-        dtypes = ['int8', 'int16', 'int32', 'int64',
-                  'uint8', 'uint16', 'uint32', 'uint64']
-
-        for name in dtypes:
-            factory = getattr(arrow, name)
-            t = factory()
-            t_required = factory(False)
-
-            assert str(t) == name
-            assert str(t_required) == '{0} not null'.format(name)
-
-    def test_list(self):
-        value_type = arrow.int32()
-        list_type = arrow.list_(value_type)
-        assert str(list_type) == 'list<int32>'
-
-    def test_string(self):
-        t = arrow.string()
-        assert str(t) == 'string'
-
-    def test_field(self):
-        t = arrow.string()
-        f = arrow.field('foo', t)
-
-        assert f.name == 'foo'
-        assert f.type is t
-        assert repr(f) == "Field('foo', type=string)"

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/doc/INSTALL.md
----------------------------------------------------------------------
diff --git a/python/doc/INSTALL.md b/python/doc/INSTALL.md
new file mode 100644
index 0000000..d30a030
--- /dev/null
+++ b/python/doc/INSTALL.md
@@ -0,0 +1,87 @@
+## Building pyarrow (Apache Arrow Python library)
+
+First, clone the master git repository:
+
+```bash
+git clone https://github.com/apache/arrow.git arrow
+```
+
+#### System requirements
+
+Building pyarrow requires:
+
+* A C++11 compiler
+
+  * Linux: gcc >= 4.8 or clang >= 3.5
+  * OS X: XCode 6.4 or higher preferred
+
+* [cmake][1]
+
+#### Python requirements
+
+You will need Python (CPython) 2.7, 3.4, or 3.5 installed. Earlier releases and
+are not being targeted.
+
+> This library targets CPython only due to an emphasis on interoperability with
+> pandas and NumPy, which are only available for CPython.
+
+The build requires NumPy, Cython, and a few other Python dependencies:
+
+```bash
+pip install cython
+cd arrow/python
+pip install -r requirements.txt
+```
+
+#### Installing Arrow C++ library
+
+First, you should choose an installation location for Arrow C++. In the future
+using the default system install location will work, but for now we are being
+explicit:
+
+```bash
+export ARROW_HOME=$HOME/local
+```
+
+Now, we build Arrow:
+
+```bash
+cd arrow/cpp
+
+mkdir dev-build
+cd dev-build
+
+cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME ..
+
+make
+
+# Use sudo here if $ARROW_HOME requires it
+make install
+```
+
+#### Install `pyarrow`
+
+```bash
+cd arrow/python
+
+python setup.py install
+```
+
+> On XCode 6 and prior there are some known OS X `@rpath` issues. If you are
+> unable to import pyarrow, upgrading XCode may be the solution.
+
+
+```python
+In [1]: import pyarrow
+
+In [2]: pyarrow.from_pylist([1,2,3])
+Out[2]:
+<pyarrow.array.Int64Array object at 0x7f899f3e60e8>
+[
+  1,
+  2,
+  3
+]
+```
+
+[1]: https://cmake.org/
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
new file mode 100644
index 0000000..8d93a15
--- /dev/null
+++ b/python/pyarrow/__init__.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+from pyarrow.array import (Array, from_pylist, total_allocated_bytes,
+                           BooleanArray, NumericArray,
+                           Int8Array, UInt8Array,
+                           ListArray, StringArray)
+
+from pyarrow.error import ArrowException
+
+from pyarrow.scalar import (ArrayValue, Scalar, NA, NAType,
+                            BooleanValue,
+                            Int8Value, Int16Value, Int32Value, Int64Value,
+                            UInt8Value, UInt16Value, UInt32Value, UInt64Value,
+                            FloatValue, DoubleValue, ListValue, StringValue)
+
+from pyarrow.schema import (null, bool_,
+                            int8, int16, int32, int64,
+                            uint8, uint16, uint32, uint64,
+                            float_, double, string,
+                            list_, struct, field,
+                            DataType, Field, Schema)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
new file mode 100644
index 0000000..d0d3486
--- /dev/null
+++ b/python/pyarrow/array.pxd
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.libarrow cimport CArray, LogicalType
+
+from pyarrow.scalar import NA
+
+from pyarrow.schema cimport DataType
+
+cdef extern from "Python.h":
+    int PySlice_Check(object)
+
+cdef class Array:
+    cdef:
+        shared_ptr[CArray] sp_array
+        CArray* ap
+
+    cdef readonly:
+        DataType type
+
+    cdef init(self, const shared_ptr[CArray]& sp_array)
+    cdef getitem(self, int i)
+
+
+cdef class BooleanArray(Array):
+    pass
+
+
+cdef class NumericArray(Array):
+    pass
+
+
+cdef class Int8Array(NumericArray):
+    pass
+
+
+cdef class UInt8Array(NumericArray):
+    pass
+
+
+cdef class Int16Array(NumericArray):
+    pass
+
+
+cdef class UInt16Array(NumericArray):
+    pass
+
+
+cdef class Int32Array(NumericArray):
+    pass
+
+
+cdef class UInt32Array(NumericArray):
+    pass
+
+
+cdef class Int64Array(NumericArray):
+    pass
+
+
+cdef class UInt64Array(NumericArray):
+    pass
+
+
+cdef class ListArray(Array):
+    pass
+
+
+cdef class StringArray(Array):
+    pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
new file mode 100644
index 0000000..bceb333
--- /dev/null
+++ b/python/pyarrow/array.pyx
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.error cimport check_status
+
+cimport pyarrow.scalar as scalar
+from pyarrow.scalar import NA
+
+def total_allocated_bytes():
+    cdef MemoryPool* pool = pyarrow.GetMemoryPool()
+    return pool.bytes_allocated()
+
+
+cdef class Array:
+
+    cdef init(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+        self.ap = sp_array.get()
+        self.type = DataType()
+        self.type.init(self.sp_array.get().type())
+
+    property null_count:
+
+        def __get__(self):
+            return self.sp_array.get().null_count()
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self.getitem(i)
+        raise StopIteration
+
+    def __repr__(self):
+        from pyarrow.formatting import array_format
+        type_format = object.__repr__(self)
+        values = array_format(self, window=10)
+        return '{0}\n{1}'.format(type_format, values)
+
+    def __len__(self):
+        return self.sp_array.get().length()
+
+    def isnull(self):
+        raise NotImplemented
+
+    def __getitem__(self, key):
+        cdef:
+            Py_ssize_t n = len(self)
+
+        if PySlice_Check(key):
+            start = key.start or 0
+            while start < 0:
+                start += n
+
+            stop = key.stop if key.stop is not None else n
+            while stop < 0:
+                stop += n
+
+            step = key.step or 1
+            if step != 1:
+                raise NotImplementedError
+            else:
+                return self.slice(start, stop)
+
+        while key < 0:
+            key += len(self)
+
+        return self.getitem(key)
+
+    cdef getitem(self, int i):
+        return scalar.box_arrow_scalar(self.type, self.sp_array, i)
+
+    def slice(self, start, end):
+        pass
+
+
+cdef class NullArray(Array):
+    pass
+
+
+cdef class BooleanArray(Array):
+    pass
+
+
+cdef class NumericArray(Array):
+    pass
+
+
+cdef class Int8Array(NumericArray):
+    pass
+
+
+cdef class UInt8Array(NumericArray):
+    pass
+
+
+cdef class Int16Array(NumericArray):
+    pass
+
+
+cdef class UInt16Array(NumericArray):
+    pass
+
+
+cdef class Int32Array(NumericArray):
+    pass
+
+
+cdef class UInt32Array(NumericArray):
+    pass
+
+
+cdef class Int64Array(NumericArray):
+    pass
+
+
+cdef class UInt64Array(NumericArray):
+    pass
+
+
+cdef class FloatArray(NumericArray):
+    pass
+
+
+cdef class DoubleArray(NumericArray):
+    pass
+
+
+cdef class ListArray(Array):
+    pass
+
+
+cdef class StringArray(Array):
+    pass
+
+
+cdef dict _array_classes = {
+    LogicalType_NA: NullArray,
+    LogicalType_BOOL: BooleanArray,
+    LogicalType_INT64: Int64Array,
+    LogicalType_DOUBLE: DoubleArray,
+    LogicalType_LIST: ListArray,
+    LogicalType_STRING: StringArray,
+}
+
+cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
+    if sp_array.get() == NULL:
+        raise ValueError('Array was NULL')
+
+    cdef CDataType* data_type = sp_array.get().type().get()
+
+    if data_type == NULL:
+        raise ValueError('Array data type was NULL')
+
+    cdef Array arr = _array_classes[data_type.type]()
+    arr.init(sp_array)
+    return arr
+
+
+def from_pylist(object list_obj, DataType type=None):
+    """
+    Convert Python list to Arrow array
+    """
+    cdef:
+        shared_ptr[CArray] sp_array
+
+    if type is None:
+        check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
+    else:
+        raise NotImplementedError
+
+    return box_arrow_array(sp_array)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/compat.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
new file mode 100644
index 0000000..08f0f23
--- /dev/null
+++ b/python/pyarrow/compat.py
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+import itertools
+
+import numpy as np
+
+import sys
+import six
+from six import BytesIO, StringIO, string_types as py_string
+
+
+PY26 = sys.version_info[:2] == (2, 6)
+PY2 = sys.version_info[0] == 2
+
+
+if PY26:
+    import unittest2 as unittest
+else:
+    import unittest
+
+
+if PY2:
+    import cPickle
+
+    try:
+        from cdecimal import Decimal
+    except ImportError:
+        from decimal import Decimal
+
+    unicode_type = unicode
+    lzip = zip
+    zip = itertools.izip
+
+    def dict_values(x):
+        return x.values()
+
+    range = xrange
+    long = long
+
+    def u(s):
+        return unicode(s, "unicode_escape")
+
+    def tobytes(o):
+        if isinstance(o, unicode):
+            return o.encode('utf8')
+        else:
+            return o
+
+    def frombytes(o):
+        return o
+else:
+    unicode_type = str
+    def lzip(*x):
+        return list(zip(*x))
+    long = int
+    zip = zip
+    def dict_values(x):
+        return list(x.values())
+    from decimal import Decimal
+    range = range
+
+    def u(s):
+        return s
+
+    def tobytes(o):
+        if isinstance(o, str):
+            return o.encode('utf8')
+        else:
+            return o
+
+    def frombytes(o):
+        return o.decode('utf8')
+
+
+integer_types = six.integer_types + (np.integer,)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/config.pyx b/python/pyarrow/config.pyx
new file mode 100644
index 0000000..521bc06
--- /dev/null
+++ b/python/pyarrow/config.pyx
@@ -0,0 +1,8 @@
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+cdef extern from 'pyarrow/init.h' namespace 'pyarrow':
+    void pyarrow_init()
+
+pyarrow_init()

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pxd b/python/pyarrow/error.pxd
new file mode 100644
index 0000000..d226abe
--- /dev/null
+++ b/python/pyarrow/error.pxd
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.pyarrow cimport *
+
+cdef check_status(const Status& status)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
new file mode 100644
index 0000000..3f8d7dd
--- /dev/null
+++ b/python/pyarrow/error.pyx
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport c_string
+from pyarrow.compat import frombytes
+
+class ArrowException(Exception):
+    pass
+
+cdef check_status(const Status& status):
+    if status.ok():
+        return
+
+    cdef c_string c_message = status.ToString()
+    raise ArrowException(frombytes(c_message))

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/formatting.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/formatting.py b/python/pyarrow/formatting.py
new file mode 100644
index 0000000..5fe0611
--- /dev/null
+++ b/python/pyarrow/formatting.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Pretty-printing and other formatting utilities for Arrow data structures
+
+import pyarrow.scalar as scalar
+
+
+def array_format(arr, window=None):
+    values = []
+
+    if window is None or window * 2 >= len(arr):
+        for x in arr:
+            values.append(value_format(x, 0))
+        contents = _indent(',\n'.join(values), 2)
+    else:
+        for i in range(window):
+            values.append(value_format(arr[i], 0) + ',')
+        values.append('...')
+        for i in range(len(arr) - window, len(arr)):
+            formatted = value_format(arr[i], 0)
+            if i < len(arr) - 1:
+                formatted += ','
+            values.append(formatted)
+        contents = _indent('\n'.join(values), 2)
+
+    return '[\n{0}\n]'.format(contents)
+
+
+def value_format(x, indent_level=0):
+    if isinstance(x, scalar.ListValue):
+        contents = ',\n'.join(value_format(item) for item in x)
+        return '[{0}]'.format(_indent(contents, 1).strip())
+    else:
+        return repr(x)
+
+
+def _indent(text, spaces):
+    if spaces == 0:
+        return text
+    block = ' ' * spaces
+    return '\n'.join(block + x for x in text.split('\n'))

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/__init__.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/__init__.pxd b/python/pyarrow/includes/__init__.pxd
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
new file mode 100644
index 0000000..839427a
--- /dev/null
+++ b/python/pyarrow/includes/common.pxd
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from libc.stdint cimport *
+from libcpp cimport bool as c_bool
+from libcpp.string cimport string as c_string
+from libcpp.vector cimport vector
+
+# This must be included for cerr and other things to work
+cdef extern from "<iostream>":
+    pass
+
+cdef extern from "<memory>" namespace "std" nogil:
+
+    cdef cppclass shared_ptr[T]:
+        shared_ptr()
+        shared_ptr(T*)
+        T* get()
+        void reset()
+        void reset(T* p)