You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/10 00:45:11 UTC
[1/2] arrow git commit: ARROW-54: [Python] Rename package to "pyarrow"
Repository: arrow
Updated Branches:
refs/heads/master 83675273b -> 6fdcd4943
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
new file mode 100644
index 0000000..baba112
--- /dev/null
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+
+ enum LogicalType" arrow::LogicalType::type":
+ LogicalType_NA" arrow::LogicalType::NA"
+
+ LogicalType_BOOL" arrow::LogicalType::BOOL"
+
+ LogicalType_UINT8" arrow::LogicalType::UINT8"
+ LogicalType_INT8" arrow::LogicalType::INT8"
+ LogicalType_UINT16" arrow::LogicalType::UINT16"
+ LogicalType_INT16" arrow::LogicalType::INT16"
+ LogicalType_UINT32" arrow::LogicalType::UINT32"
+ LogicalType_INT32" arrow::LogicalType::INT32"
+ LogicalType_UINT64" arrow::LogicalType::UINT64"
+ LogicalType_INT64" arrow::LogicalType::INT64"
+
+ LogicalType_FLOAT" arrow::LogicalType::FLOAT"
+ LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+
+ LogicalType_STRING" arrow::LogicalType::STRING"
+
+ LogicalType_LIST" arrow::LogicalType::LIST"
+ LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+
+ cdef cppclass CDataType" arrow::DataType":
+ LogicalType type
+ c_bool nullable
+
+ c_bool Equals(const CDataType* other)
+
+ c_string ToString()
+
+ cdef cppclass MemoryPool" arrow::MemoryPool":
+ int64_t bytes_allocated()
+
+ cdef cppclass CListType" arrow::ListType"(CDataType):
+ CListType(const shared_ptr[CDataType]& value_type,
+ c_bool nullable)
+
+ cdef cppclass CStringType" arrow::StringType"(CDataType):
+ pass
+
+ cdef cppclass CField" arrow::Field":
+ c_string name
+ shared_ptr[CDataType] type
+
+ CField(const c_string& name, const shared_ptr[CDataType]& type)
+
+ cdef cppclass CStructType" arrow::StructType"(CDataType):
+ CStructType(const vector[shared_ptr[CField]]& fields,
+ c_bool nullable)
+
+ cdef cppclass CSchema" arrow::Schema":
+ CSchema(const shared_ptr[CField]& fields)
+
+ cdef cppclass CArray" arrow::Array":
+ const shared_ptr[CDataType]& type()
+
+ int32_t length()
+ int32_t null_count()
+ LogicalType logical_type()
+
+ c_bool IsNull(int i)
+
+ cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
+ uint8_t Value(int i)
+
+ cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
+ int8_t Value(int i)
+
+ cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
+ uint16_t Value(int i)
+
+ cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
+ int16_t Value(int i)
+
+ cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
+ uint32_t Value(int i)
+
+ cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
+ int32_t Value(int i)
+
+ cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
+ uint64_t Value(int i)
+
+ cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
+ int64_t Value(int i)
+
+ cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
+ float Value(int i)
+
+ cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
+ double Value(int i)
+
+ cdef cppclass CListArray" arrow::ListArray"(CArray):
+ const int32_t* offsets()
+ int32_t offset(int i)
+ int32_t value_length(int i)
+ const shared_ptr[CArray]& values()
+ const shared_ptr[CDataType]& value_type()
+
+ cdef cppclass CStringArray" arrow::StringArray"(CListArray):
+ c_string GetString(int i)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/parquet.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/parquet.pxd b/python/pyarrow/includes/parquet.pxd
new file mode 100644
index 0000000..99a2d42
--- /dev/null
+++ b/python/pyarrow/includes/parquet.pxd
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
+ cdef cppclass ColumnReader:
+ pass
+
+ cdef cppclass BoolReader(ColumnReader):
+ pass
+
+ cdef cppclass Int32Reader(ColumnReader):
+ pass
+
+ cdef cppclass Int64Reader(ColumnReader):
+ pass
+
+ cdef cppclass Int96Reader(ColumnReader):
+ pass
+
+ cdef cppclass FloatReader(ColumnReader):
+ pass
+
+ cdef cppclass DoubleReader(ColumnReader):
+ pass
+
+ cdef cppclass ByteArrayReader(ColumnReader):
+ pass
+
+ cdef cppclass RowGroupReader:
+ pass
+
+ cdef cppclass ParquetFileReader:
+ pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
new file mode 100644
index 0000000..9a0c004
--- /dev/null
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType,
+ MemoryPool)
+
+cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
+ # We can later add more of the common status factory methods as needed
+ cdef Status Status_OK "Status::OK"()
+
+ cdef cppclass Status:
+ Status()
+
+ c_string ToString()
+
+ c_bool ok()
+ c_bool IsOutOfMemory()
+ c_bool IsKeyError()
+ c_bool IsTypeError()
+ c_bool IsIOError()
+ c_bool IsValueError()
+ c_bool IsNotImplemented()
+ c_bool IsArrowError()
+
+ shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+ Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
+
+ MemoryPool* GetMemoryPool()
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx
new file mode 100644
index 0000000..622e7d0
--- /dev/null
+++ b/python/pyarrow/parquet.pyx
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.includes.parquet cimport *
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pxd b/python/pyarrow/scalar.pxd
new file mode 100644
index 0000000..b068457
--- /dev/null
+++ b/python/pyarrow/scalar.pxd
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+
+from pyarrow.schema cimport DataType
+
+cdef class Scalar:
+ cdef readonly:
+ DataType type
+
+
+cdef class NAType(Scalar):
+ pass
+
+
+cdef class ArrayValue(Scalar):
+ cdef:
+ shared_ptr[CArray] sp_array
+ int index
+
+ cdef void init(self, DataType type,
+ const shared_ptr[CArray]& sp_array, int index)
+
+ cdef void _set_array(self, const shared_ptr[CArray]& sp_array)
+
+
+cdef class Int8Value(ArrayValue):
+ pass
+
+
+cdef class Int64Value(ArrayValue):
+ pass
+
+
+cdef class ListValue(ArrayValue):
+ cdef readonly:
+ DataType value_type
+
+ cdef:
+ CListArray* ap
+
+ cdef getitem(self, int i)
+
+
+cdef class StringValue(ArrayValue):
+ pass
+
+cdef object box_arrow_scalar(DataType type,
+ const shared_ptr[CArray]& sp_array,
+ int index)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
new file mode 100644
index 0000000..261a389
--- /dev/null
+++ b/python/pyarrow/scalar.pyx
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.schema cimport DataType, box_data_type
+
+from pyarrow.compat import frombytes
+import pyarrow.schema as schema
+
+NA = None
+
+cdef class NAType(Scalar):
+
+ def __cinit__(self):
+ global NA
+ if NA is not None:
+ raise Exception('Cannot create multiple NAType instances')
+
+ self.type = schema.null()
+
+ def __repr__(self):
+ return 'NA'
+
+ def as_py(self):
+ return None
+
+NA = NAType()
+
+cdef class ArrayValue(Scalar):
+
+ cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
+ int index):
+ self.type = type
+ self.index = index
+ self._set_array(sp_array)
+
+ cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+ self.sp_array = sp_array
+
+ def __repr__(self):
+ if hasattr(self, 'as_py'):
+ return repr(self.as_py())
+ else:
+ return Scalar.__repr__(self)
+
+
+cdef class BooleanValue(ArrayValue):
+ pass
+
+
+cdef class Int8Value(ArrayValue):
+
+ def as_py(self):
+ cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class UInt8Value(ArrayValue):
+
+ def as_py(self):
+ cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class Int16Value(ArrayValue):
+
+ def as_py(self):
+ cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class UInt16Value(ArrayValue):
+
+ def as_py(self):
+ cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class Int32Value(ArrayValue):
+
+ def as_py(self):
+ cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class UInt32Value(ArrayValue):
+
+ def as_py(self):
+ cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class Int64Value(ArrayValue):
+
+ def as_py(self):
+ cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class UInt64Value(ArrayValue):
+
+ def as_py(self):
+ cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class FloatValue(ArrayValue):
+
+ def as_py(self):
+ cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class DoubleValue(ArrayValue):
+
+ def as_py(self):
+ cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
+ return ap.Value(self.index)
+
+
+cdef class StringValue(ArrayValue):
+
+ def as_py(self):
+ cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
+ return frombytes(ap.GetString(self.index))
+
+
+cdef class ListValue(ArrayValue):
+
+ def __len__(self):
+ return self.ap.value_length(self.index)
+
+ def __getitem__(self, i):
+ return self.getitem(i)
+
+ def __iter__(self):
+ for i in range(len(self)):
+ yield self.getitem(i)
+ raise StopIteration
+
+ cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+ self.sp_array = sp_array
+ self.ap = <CListArray*> sp_array.get()
+ self.value_type = box_data_type(self.ap.value_type())
+
+ cdef getitem(self, int i):
+ cdef int j = self.ap.offset(self.index) + i
+ return box_arrow_scalar(self.value_type, self.ap.values(), j)
+
+ def as_py(self):
+ cdef:
+ int j
+ list result = []
+
+ for j in range(len(self)):
+ result.append(self.getitem(j).as_py())
+
+ return result
+
+
+cdef dict _scalar_classes = {
+ LogicalType_UINT8: Int8Value,
+ LogicalType_UINT16: Int16Value,
+ LogicalType_UINT32: Int32Value,
+ LogicalType_UINT64: Int64Value,
+ LogicalType_INT8: Int8Value,
+ LogicalType_INT16: Int16Value,
+ LogicalType_INT32: Int32Value,
+ LogicalType_INT64: Int64Value,
+ LogicalType_FLOAT: FloatValue,
+ LogicalType_DOUBLE: DoubleValue,
+ LogicalType_LIST: ListValue,
+ LogicalType_STRING: StringValue
+}
+
+cdef object box_arrow_scalar(DataType type,
+ const shared_ptr[CArray]& sp_array,
+ int index):
+ cdef ArrayValue val
+ if sp_array.get().IsNull(index):
+ return NA
+ else:
+ val = _scalar_classes[type.type.type]()
+ val.init(type, sp_array, index)
+ return val
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
new file mode 100644
index 0000000..07b9bd0
--- /dev/null
+++ b/python/pyarrow/schema.pxd
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.libarrow cimport CDataType, CField, CSchema
+
+cdef class DataType:
+ cdef:
+ shared_ptr[CDataType] sp_type
+ CDataType* type
+
+ cdef init(self, const shared_ptr[CDataType]& type)
+
+cdef class Field:
+ cdef:
+ shared_ptr[CField] sp_field
+ CField* field
+
+ cdef readonly:
+ DataType type
+
+cdef class Schema:
+ cdef:
+ shared_ptr[CSchema] sp_schema
+ CSchema* schema
+
+cdef DataType box_data_type(const shared_ptr[CDataType]& type)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
new file mode 100644
index 0000000..ea87872
--- /dev/null
+++ b/python/pyarrow/schema.pyx
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########################################
+# Data types, fields, schemas, and so forth
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+
+cimport cpython
+
+cdef class DataType:
+
+ def __cinit__(self):
+ pass
+
+ cdef init(self, const shared_ptr[CDataType]& type):
+ self.sp_type = type
+ self.type = type.get()
+
+ def __str__(self):
+ return frombytes(self.type.ToString())
+
+ def __repr__(self):
+ return 'DataType({0})'.format(str(self))
+
+ def __richcmp__(DataType self, DataType other, int op):
+ if op == cpython.Py_EQ:
+ return self.type.Equals(other.type)
+ elif op == cpython.Py_NE:
+ return not self.type.Equals(other.type)
+ else:
+ raise TypeError('Invalid comparison')
+
+
+cdef class Field:
+
+ def __cinit__(self, object name, DataType type):
+ self.type = type
+ self.sp_field.reset(new CField(tobytes(name), type.sp_type))
+ self.field = self.sp_field.get()
+
+ def __repr__(self):
+ return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+
+ property name:
+
+ def __get__(self):
+ return frombytes(self.field.name)
+
+cdef dict _type_cache = {}
+
+cdef DataType primitive_type(LogicalType type, bint nullable=True):
+ if (type, nullable) in _type_cache:
+ return _type_cache[type, nullable]
+
+ cdef DataType out = DataType()
+ out.init(pyarrow.GetPrimitiveType(type, nullable))
+
+ _type_cache[type, nullable] = out
+ return out
+
+#------------------------------------------------------------
+# Type factory functions
+
+def field(name, type):
+ return Field(name, type)
+
+cdef set PRIMITIVE_TYPES = set([
+ LogicalType_NA, LogicalType_BOOL,
+ LogicalType_UINT8, LogicalType_INT8,
+ LogicalType_UINT16, LogicalType_INT16,
+ LogicalType_UINT32, LogicalType_INT32,
+ LogicalType_UINT64, LogicalType_INT64,
+ LogicalType_FLOAT, LogicalType_DOUBLE])
+
+def null():
+ return primitive_type(LogicalType_NA)
+
+def bool_(c_bool nullable=True):
+ return primitive_type(LogicalType_BOOL, nullable)
+
+def uint8(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT8, nullable)
+
+def int8(c_bool nullable=True):
+ return primitive_type(LogicalType_INT8, nullable)
+
+def uint16(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT16, nullable)
+
+def int16(c_bool nullable=True):
+ return primitive_type(LogicalType_INT16, nullable)
+
+def uint32(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT32, nullable)
+
+def int32(c_bool nullable=True):
+ return primitive_type(LogicalType_INT32, nullable)
+
+def uint64(c_bool nullable=True):
+ return primitive_type(LogicalType_UINT64, nullable)
+
+def int64(c_bool nullable=True):
+ return primitive_type(LogicalType_INT64, nullable)
+
+def float_(c_bool nullable=True):
+ return primitive_type(LogicalType_FLOAT, nullable)
+
+def double(c_bool nullable=True):
+ return primitive_type(LogicalType_DOUBLE, nullable)
+
+def string(c_bool nullable=True):
+ """
+ UTF8 string
+ """
+ return primitive_type(LogicalType_STRING, nullable)
+
+def list_(DataType value_type, c_bool nullable=True):
+ cdef DataType out = DataType()
+ out.init(shared_ptr[CDataType](
+ new CListType(value_type.sp_type, nullable)))
+ return out
+
+def struct(fields, c_bool nullable=True):
+ """
+
+ """
+ cdef:
+ DataType out = DataType()
+ Field field
+ vector[shared_ptr[CField]] c_fields
+
+ for field in fields:
+ c_fields.push_back(field.sp_field)
+
+ out.init(shared_ptr[CDataType](
+ new CStructType(c_fields, nullable)))
+ return out
+
+
+cdef DataType box_data_type(const shared_ptr[CDataType]& type):
+ cdef DataType out = DataType()
+ out.init(type)
+ return out
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/__init__.py b/python/pyarrow/tests/__init__.py
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
new file mode 100644
index 0000000..034c157
--- /dev/null
+++ b/python/pyarrow/tests/test_array.py
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow
+import pyarrow.formatting as fmt
+
+
+class TestArrayAPI(unittest.TestCase):
+
+ def test_getitem_NA(self):
+ arr = pyarrow.from_pylist([1, None, 2])
+ assert arr[1] is pyarrow.NA
+
+ def test_list_format(self):
+ arr = pyarrow.from_pylist([[1], None, [2, 3]])
+ result = fmt.array_format(arr)
+ expected = """\
+[
+ [1],
+ NA,
+ [2,
+ 3]
+]"""
+ assert result == expected
+
+ def test_string_format(self):
+ arr = pyarrow.from_pylist(['foo', None, 'bar'])
+ result = fmt.array_format(arr)
+ expected = """\
+[
+ 'foo',
+ NA,
+ 'bar'
+]"""
+ assert result == expected
+
+ def test_long_array_format(self):
+ arr = pyarrow.from_pylist(range(100))
+ result = fmt.array_format(arr, window=2)
+ expected = """\
+[
+ 0,
+ 1,
+ ...
+ 98,
+ 99
+]"""
+ assert result == expected
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
new file mode 100644
index 0000000..25f6969
--- /dev/null
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow
+
+
+class TestConvertList(unittest.TestCase):
+
+ def test_boolean(self):
+ pass
+
+ def test_empty_list(self):
+ arr = pyarrow.from_pylist([])
+ assert len(arr) == 0
+ assert arr.null_count == 0
+ assert arr.type == pyarrow.null()
+
+ def test_all_none(self):
+ arr = pyarrow.from_pylist([None, None])
+ assert len(arr) == 2
+ assert arr.null_count == 2
+ assert arr.type == pyarrow.null()
+
+ def test_integer(self):
+ arr = pyarrow.from_pylist([1, None, 3, None])
+ assert len(arr) == 4
+ assert arr.null_count == 2
+ assert arr.type == pyarrow.int64()
+
+ def test_garbage_collection(self):
+ import gc
+ bytes_before = pyarrow.total_allocated_bytes()
+ pyarrow.from_pylist([1, None, 3, None])
+ gc.collect()
+ assert pyarrow.total_allocated_bytes() == bytes_before
+
+ def test_double(self):
+ data = [1.5, 1, None, 2.5, None, None]
+ arr = pyarrow.from_pylist(data)
+ assert len(arr) == 6
+ assert arr.null_count == 3
+ assert arr.type == pyarrow.double()
+
+ def test_string(self):
+ data = ['foo', b'bar', None, 'arrow']
+ arr = pyarrow.from_pylist(data)
+ assert len(arr) == 4
+ assert arr.null_count == 1
+ assert arr.type == pyarrow.string()
+
+ def test_mixed_nesting_levels(self):
+ pyarrow.from_pylist([1, 2, None])
+ pyarrow.from_pylist([[1], [2], None])
+ pyarrow.from_pylist([[1], [2], [None]])
+
+ with self.assertRaises(pyarrow.ArrowException):
+ pyarrow.from_pylist([1, 2, [1]])
+
+ with self.assertRaises(pyarrow.ArrowException):
+ pyarrow.from_pylist([1, 2, []])
+
+ with self.assertRaises(pyarrow.ArrowException):
+ pyarrow.from_pylist([[1], [2], [None, [1]]])
+
+ def test_list_of_int(self):
+ data = [[1, 2, 3], [], None, [1, 2]]
+ arr = pyarrow.from_pylist(data)
+ assert len(arr) == 4
+ assert arr.null_count == 1
+ assert arr.type == pyarrow.list_(pyarrow.int64())
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
new file mode 100644
index 0000000..021737d
--- /dev/null
+++ b/python/pyarrow/tests/test_scalars.py
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest, u
+import pyarrow as arrow
+
+
+class TestScalars(unittest.TestCase):
+
+ def test_null_singleton(self):
+ with self.assertRaises(Exception):
+ arrow.NAType()
+
+ def test_bool(self):
+ pass
+
+ def test_int64(self):
+ arr = arrow.from_pylist([1, 2, None])
+
+ v = arr[0]
+ assert isinstance(v, arrow.Int64Value)
+ assert repr(v) == "1"
+ assert v.as_py() == 1
+
+ assert arr[2] is arrow.NA
+
+ def test_double(self):
+ arr = arrow.from_pylist([1.5, None, 3])
+
+ v = arr[0]
+ assert isinstance(v, arrow.DoubleValue)
+ assert repr(v) == "1.5"
+ assert v.as_py() == 1.5
+
+ assert arr[1] is arrow.NA
+
+ v = arr[2]
+ assert v.as_py() == 3.0
+
+ def test_string(self):
+ arr = arrow.from_pylist(['foo', None, u('bar')])
+
+ v = arr[0]
+ assert isinstance(v, arrow.StringValue)
+ assert repr(v) == "'foo'"
+ assert v.as_py() == 'foo'
+
+ assert arr[1] is arrow.NA
+
+ v = arr[2].as_py()
+ assert v == 'bar'
+ assert isinstance(v, str)
+
+ def test_list(self):
+ arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
+
+ v = arr[0]
+ assert len(v) == 2
+ assert isinstance(v, arrow.ListValue)
+ assert repr(v) == "['foo', None]"
+ assert v.as_py() == ['foo', None]
+ assert v[0].as_py() == 'foo'
+ assert v[1] is arrow.NA
+
+ assert arr[1] is arrow.NA
+
+ v = arr[3]
+ assert len(v) == 0
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
new file mode 100644
index 0000000..0235526
--- /dev/null
+++ b/python/pyarrow/tests/test_schema.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow as arrow
+
+
+class TestTypes(unittest.TestCase):
+
+ def test_integers(self):
+ dtypes = ['int8', 'int16', 'int32', 'int64',
+ 'uint8', 'uint16', 'uint32', 'uint64']
+
+ for name in dtypes:
+ factory = getattr(arrow, name)
+ t = factory()
+ t_required = factory(False)
+
+ assert str(t) == name
+ assert str(t_required) == '{0} not null'.format(name)
+
+ def test_list(self):
+ value_type = arrow.int32()
+ list_type = arrow.list_(value_type)
+ assert str(list_type) == 'list<int32>'
+
+ def test_string(self):
+ t = arrow.string()
+ assert str(t) == 'string'
+
+ def test_field(self):
+ t = arrow.string()
+ f = arrow.field('foo', t)
+
+ assert f.name == 'foo'
+ assert f.type is t
+ assert repr(f) == "Field('foo', type=string)"
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/requirements.txt
----------------------------------------------------------------------
diff --git a/python/requirements.txt b/python/requirements.txt
index a82cb20..f42c90c 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,4 +1,3 @@
pytest
numpy>=1.7.0
-pandas>=0.12.0
six
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index eb3ff2a..5cc871a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -27,7 +27,7 @@ import Cython
import sys
import pkg_resources
-from setuptools import setup
+from setuptools import setup, Extension
import os
@@ -40,10 +40,12 @@ from distutils import sysconfig
is_64_bit = sys.maxsize > 2**32
# Check if this is a debug build of Python.
-if hasattr(sys, 'gettotalrefcount'):
- build_type = 'Debug'
-else:
- build_type = 'Release'
+# if hasattr(sys, 'gettotalrefcount'):
+# build_type = 'Debug'
+# else:
+# build_type = 'Release'
+
+build_type = 'Debug'
if Cython.__version__ < '0.19.1':
raise Exception('Please upgrade to Cython 0.19.1 or newer')
@@ -51,7 +53,7 @@ if Cython.__version__ < '0.19.1':
MAJOR = 0
MINOR = 1
MICRO = 0
-VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
+VERSION = '%d.%d.%ddev' % (MAJOR, MINOR, MICRO)
class clean(_clean):
@@ -70,6 +72,9 @@ class build_ext(_build_ext):
def build_extensions(self):
numpy_incl = pkg_resources.resource_filename('numpy', 'core/include')
+ self.extensions = [ext for ext in self.extensions
+ if ext.name != '__dummy__']
+
for ext in self.extensions:
if (hasattr(ext, 'include_dirs') and
numpy_incl not in ext.include_dirs):
@@ -98,6 +103,7 @@ class build_ext(_build_ext):
# The staging directory for the module being built
build_temp = pjoin(os.getcwd(), self.build_temp)
+ build_lib = os.path.join(os.getcwd(), self.build_lib)
# Change to the build directory
saved_cwd = os.getcwd()
@@ -124,7 +130,7 @@ class build_ext(_build_ext):
static_lib_option, source]
self.spawn(cmake_command)
- args = ['make']
+ args = ['make', 'VERBOSE=1']
if 'PYARROW_PARALLEL' in os.environ:
args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL']))
self.spawn(args)
@@ -150,21 +156,19 @@ class build_ext(_build_ext):
if self.inplace:
# a bit hacky
build_lib = saved_cwd
- else:
- build_lib = pjoin(os.getcwd(), self.build_lib)
# Move the built libpyarrow library to the place expected by the Python
# build
if sys.platform != 'win32':
name, = glob.glob('libpyarrow.*')
try:
- os.makedirs(pjoin(build_lib, 'arrow'))
+ os.makedirs(pjoin(build_lib, 'pyarrow'))
except OSError:
pass
- shutil.move(name, pjoin(build_lib, 'arrow', name))
+ shutil.move(name, pjoin(build_lib, 'pyarrow', name))
else:
shutil.move(pjoin(build_type, 'pyarrow.dll'),
- pjoin(build_lib, 'arrow', 'pyarrow.dll'))
+ pjoin(build_lib, 'pyarrow', 'pyarrow.dll'))
# Move the built C-extension to the place expected by the Python build
self._found_names = []
@@ -192,7 +196,7 @@ class build_ext(_build_ext):
def _get_cmake_ext_path(self, name):
# Get the package directory from build_py
build_py = self.get_finalized_command('build_py')
- package_dir = build_py.get_package_dir('arrow')
+ package_dir = build_py.get_package_dir('pyarrow')
# This is the name of the arrow C-extension
suffix = sysconfig.get_config_var('EXT_SUFFIX')
if suffix is None:
@@ -217,23 +221,23 @@ class build_ext(_build_ext):
def get_outputs(self):
# Just the C extensions
- cmake_exts = [self._get_cmake_ext_path(name)
- for name in self.get_names()]
- regular_exts = _build_ext.get_outputs(self)
- return regular_exts + cmake_exts
+ # regular_exts = _build_ext.get_outputs(self)
+ return [self._get_cmake_ext_path(name)
+ for name in self.get_names()]
-extensions = []
-
DESC = """\
Python library for Apache Arrow"""
setup(
- name="arrow",
- packages=['arrow', 'arrow.tests'],
+ name="pyarrow",
+ packages=['pyarrow', 'pyarrow.tests'],
version=VERSION,
- package_data={'arrow': ['*.pxd', '*.pyx']},
- ext_modules=extensions,
+ zip_safe=False,
+ package_data={'pyarrow': ['*.pxd', '*.pyx']},
+ # Dummy extension to trigger build_ext
+ ext_modules=[Extension('__dummy__', sources=[])],
+
cmdclass={
'clean': clean,
'build_ext': build_ext
@@ -243,5 +247,5 @@ setup(
license='Apache License, Version 2.0',
maintainer="Apache Arrow Developers",
maintainer_email="dev@arrow.apache.org",
- test_suite="arrow.tests"
+ test_suite="pyarrow.tests"
)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/src/pyarrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/util/CMakeLists.txt b/python/src/pyarrow/util/CMakeLists.txt
index 3fd8bac..4afb4d0 100644
--- a/python/src/pyarrow/util/CMakeLists.txt
+++ b/python/src/pyarrow/util/CMakeLists.txt
@@ -19,19 +19,21 @@
# pyarrow_test_main
#######################################
-add_library(pyarrow_test_main
- test_main.cc)
+if (PYARROW_BUILD_TESTS)
+ add_library(pyarrow_test_main
+ test_main.cc)
-if (APPLE)
- target_link_libraries(pyarrow_test_main
- gtest
- dl)
- set_target_properties(pyarrow_test_main
- PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-else()
- target_link_libraries(pyarrow_test_main
- gtest
- pthread
- dl
- )
+ if (APPLE)
+ target_link_libraries(pyarrow_test_main
+ gtest
+ dl)
+ set_target_properties(pyarrow_test_main
+ PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+ else()
+ target_link_libraries(pyarrow_test_main
+ gtest
+ pthread
+ dl
+ )
+ endif()
endif()
[2/2] arrow git commit: ARROW-54: [Python] Rename package to "pyarrow"
Posted by we...@apache.org.
ARROW-54: [Python] Rename package to "pyarrow"
Also fixed rpath issues (at great cost) per ARROW-53
Author: Wes McKinney <we...@apache.org>
Closes #23 from wesm/ARROW-54 and squashes the following commits:
b8ce0e8 [Wes McKinney] Update installation instructions
cae9b39 [Wes McKinney] Fix rpath issues per ARROW-53
7554539 [Wes McKinney] Twiddle rpath stuff, remove empty arrow_test_util module
8cca41a [Wes McKinney] Fix Travis CI script for renamed package
1d37c93 [Wes McKinney] Opt in to building unit tests
60088d0 [Wes McKinney] Rename package to pyarrow
e3d0caf [Wes McKinney] Note on other Python interpreters
80d3bac [Wes McKinney] Start installation document
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/6fdcd494
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/6fdcd494
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/6fdcd494
Branch: refs/heads/master
Commit: 6fdcd4943ff9a8cc66afbee380217cec40c0cda0
Parents: 8367527
Author: Wes McKinney <we...@apache.org>
Authored: Wed Mar 9 15:45:05 2016 -0800
Committer: Wes McKinney <we...@apache.org>
Committed: Wed Mar 9 15:45:05 2016 -0800
----------------------------------------------------------------------
.travis.yml | 4 +-
ci/travis_script_python.sh | 2 +-
cpp/CMakeLists.txt | 29 ++--
cpp/src/arrow/CMakeLists.txt | 2 +-
cpp/src/arrow/util/CMakeLists.txt | 44 ++---
python/CMakeLists.txt | 31 ++--
python/arrow/__init__.py | 38 -----
python/arrow/array.pxd | 85 ----------
python/arrow/array.pyx | 192 ---------------------
python/arrow/compat.py | 92 ----------
python/arrow/config.pyx | 8 -
python/arrow/error.pxd | 20 ---
python/arrow/error.pyx | 30 ----
python/arrow/formatting.py | 56 ------
python/arrow/includes/__init__.pxd | 0
python/arrow/includes/arrow.pxd | 124 --------------
python/arrow/includes/common.pxd | 36 ----
python/arrow/includes/parquet.pxd | 51 ------
python/arrow/includes/pyarrow.pxd | 45 -----
python/arrow/parquet.pyx | 23 ---
python/arrow/scalar.pxd | 66 --------
python/arrow/scalar.pyx | 198 ----------------------
python/arrow/schema.pxd | 41 -----
python/arrow/schema.pyx | 164 ------------------
python/arrow/tests/__init__.py | 0
python/arrow/tests/test_array.py | 63 -------
python/arrow/tests/test_convert_builtin.py | 85 ----------
python/arrow/tests/test_scalars.py | 82 ---------
python/arrow/tests/test_schema.py | 51 ------
python/doc/INSTALL.md | 87 ++++++++++
python/pyarrow/__init__.py | 38 +++++
python/pyarrow/array.pxd | 85 ++++++++++
python/pyarrow/array.pyx | 192 +++++++++++++++++++++
python/pyarrow/compat.py | 92 ++++++++++
python/pyarrow/config.pyx | 8 +
python/pyarrow/error.pxd | 20 +++
python/pyarrow/error.pyx | 29 ++++
python/pyarrow/formatting.py | 56 ++++++
python/pyarrow/includes/__init__.pxd | 0
python/pyarrow/includes/common.pxd | 36 ++++
python/pyarrow/includes/libarrow.pxd | 124 ++++++++++++++
python/pyarrow/includes/parquet.pxd | 51 ++++++
python/pyarrow/includes/pyarrow.pxd | 45 +++++
python/pyarrow/parquet.pyx | 23 +++
python/pyarrow/scalar.pxd | 66 ++++++++
python/pyarrow/scalar.pyx | 198 ++++++++++++++++++++++
python/pyarrow/schema.pxd | 41 +++++
python/pyarrow/schema.pyx | 164 ++++++++++++++++++
python/pyarrow/tests/__init__.py | 0
python/pyarrow/tests/test_array.py | 63 +++++++
python/pyarrow/tests/test_convert_builtin.py | 85 ++++++++++
python/pyarrow/tests/test_scalars.py | 82 +++++++++
python/pyarrow/tests/test_schema.py | 51 ++++++
python/requirements.txt | 1 -
python/setup.py | 52 +++---
python/src/pyarrow/util/CMakeLists.txt | 30 ++--
56 files changed, 1740 insertions(+), 1641 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 9e858d7..49a956e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,7 +27,8 @@ matrix:
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh
- compiler: clang
- language: cpp
+ language: objective-c
+ osx_image: xcode6.4
os: osx
addons:
before_script:
@@ -40,7 +41,6 @@ before_install:
- ulimit -c unlimited -S
- export CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
- export ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install
-- export LD_LIBRARY_PATH=$ARROW_CPP_INSTALL/lib:$LD_LIBRARY_PATH
after_script:
- rm -rf $CPP_BUILD_DIR
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 9b0bd4f..14d66b4 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -48,7 +48,7 @@ export ARROW_HOME=$ARROW_CPP_INSTALL
python setup.py build_ext --inplace
-py.test -vv -r sxX arrow
+py.test -vv -r sxX pyarrow
# if [ $TRAVIS_OS_NAME == "linux" ]; then
# valgrind --tool=memcheck py.test -vv -r sxX arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index e8cb88c..f5f6038 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -37,11 +37,6 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
-if(APPLE)
- # In newer versions of CMake, this is the default setting
- set(CMAKE_MACOSX_RPATH 1)
-endif()
-
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
@@ -339,10 +334,13 @@ endfunction()
if ("$ENV{GTEST_HOME}" STREQUAL "")
set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
endif()
-find_package(GTest REQUIRED)
-include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-ADD_THIRDPARTY_LIB(gtest
- STATIC_LIB ${GTEST_STATIC_LIB})
+
+if(ARROW_BUILD_TESTS)
+ find_package(GTest REQUIRED)
+ include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+ ADD_THIRDPARTY_LIB(gtest
+ STATIC_LIB ${GTEST_STATIC_LIB})
+endif()
## Google PerfTools
##
@@ -366,7 +364,7 @@ ADD_THIRDPARTY_LIB(gtest
############################################################
# Linker setup
############################################################
-set(ARROW_MIN_TEST_LIBS arrow arrow_test_main arrow_test_util ${ARROW_BASE_LIBS})
+set(ARROW_MIN_TEST_LIBS arrow arrow_test_main ${ARROW_BASE_LIBS})
set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
############################################################
@@ -461,9 +459,18 @@ add_library(arrow
${LIBARROW_LINKAGE}
${ARROW_SRCS}
)
+
+if (APPLE)
+ set_target_properties(arrow
+ PROPERTIES
+ BUILD_WITH_INSTALL_RPATH ON
+ INSTALL_NAME_DIR "@rpath")
+endif()
+
set_target_properties(arrow
PROPERTIES
- LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+ LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
+)
target_link_libraries(arrow ${LIBARROW_LINK_LIBS})
add_subdirectory(src/arrow)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 77326ce..73e6a9b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -27,6 +27,6 @@ install(FILES
# Unit tests
#######################################
-set(ARROW_TEST_LINK_LIBS arrow_test_util ${ARROW_MIN_TEST_LIBS})
+set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
ADD_ARROW_TEST(array-test)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 4272ce4..d8e2f98 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -29,36 +29,26 @@ install(FILES
DESTINATION include/arrow/util)
#######################################
-# arrow_test_util
-#######################################
-
-add_library(arrow_test_util)
-target_link_libraries(arrow_test_util
-)
-
-SET_TARGET_PROPERTIES(arrow_test_util PROPERTIES LINKER_LANGUAGE CXX)
-
-#######################################
# arrow_test_main
#######################################
-add_library(arrow_test_main
- test_main.cc)
-
-if (APPLE)
- target_link_libraries(arrow_test_main
- gtest
- arrow_test_util
- dl)
- set_target_properties(arrow_test_main
- PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-else()
- target_link_libraries(arrow_test_main
- gtest
- arrow_test_util
- pthread
- dl
- )
+if (ARROW_BUILD_TESTS)
+ add_library(arrow_test_main
+ test_main.cc)
+
+ if (APPLE)
+ target_link_libraries(arrow_test_main
+ gtest
+ dl)
+ set_target_properties(arrow_test_main
+ PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+ else()
+ target_link_libraries(arrow_test_main
+ gtest
+ pthread
+ dl
+ )
+ endif()
endif()
ADD_ARROW_TEST(bit-util-test)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8f5c27b..0ecafc7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,6 +45,13 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
+# Top level cmake dir
+if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
+ option(PYARROW_BUILD_TESTS
+ "Build the PyArrow C++ googletest unit tests"
+ OFF)
+endif()
+
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
@@ -322,10 +329,12 @@ function(ADD_THIRDPARTY_LIB LIB_NAME)
endfunction()
## GMock
-find_package(GTest REQUIRED)
-include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-ADD_THIRDPARTY_LIB(gtest
- STATIC_LIB ${GTEST_STATIC_LIB})
+if (PYARROW_BUILD_TESTS)
+ find_package(GTest REQUIRED)
+ include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
+ ADD_THIRDPARTY_LIB(gtest
+ STATIC_LIB ${GTEST_STATIC_LIB})
+endif()
## Arrow
find_package(Arrow REQUIRED)
@@ -391,6 +400,10 @@ endif (UNIX)
# Subdirectories
############################################################
+if (UNIX)
+ set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+endif()
+
add_subdirectory(src/pyarrow)
add_subdirectory(src/pyarrow/util)
@@ -407,10 +420,11 @@ set(LINK_LIBS
arrow
)
+SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
add_library(pyarrow SHARED
${PYARROW_SRCS})
target_link_libraries(pyarrow ${LINK_LIBS})
-set_target_properties(pyarrow PROPERTIES LINKER_LANGUAGE CXX)
if(APPLE)
set_target_properties(pyarrow PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
@@ -420,9 +434,6 @@ endif()
# Setup and build Cython modules
############################################################
-set(USE_RELATIVE_RPATH ON)
-set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-
set(CYTHON_EXTENSIONS
array
config
@@ -437,7 +448,7 @@ foreach(module ${CYTHON_EXTENSIONS})
list(REMOVE_AT directories -1)
string(REPLACE "." "/" module_root "${module}")
- set(module_SRC arrow/${module_root}.pyx)
+ set(module_SRC pyarrow/${module_root}.pyx)
set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX 1)
cython_add_module(${module_name}
@@ -463,7 +474,7 @@ foreach(module ${CYTHON_EXTENSIONS})
endwhile(${i} GREATER 0)
# for inplace development for now
- set(module_install_rpath "${CMAKE_SOURCE_DIR}/arrow/")
+ #set(module_install_rpath "${CMAKE_SOURCE_DIR}/pyarrow/")
set_target_properties(${module_name} PROPERTIES
INSTALL_RPATH ${module_install_rpath})
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/arrow/__init__.py b/python/arrow/__init__.py
deleted file mode 100644
index 3507ea0..0000000
--- a/python/arrow/__init__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# flake8: noqa
-
-from arrow.array import (Array, from_pylist, total_allocated_bytes,
- BooleanArray, NumericArray,
- Int8Array, UInt8Array,
- ListArray, StringArray)
-
-from arrow.error import ArrowException
-
-from arrow.scalar import (ArrayValue, Scalar, NA, NAType,
- BooleanValue,
- Int8Value, Int16Value, Int32Value, Int64Value,
- UInt8Value, UInt16Value, UInt32Value, UInt64Value,
- FloatValue, DoubleValue, ListValue, StringValue)
-
-from arrow.schema import (null, bool_,
- int8, int16, int32, int64,
- uint8, uint16, uint32, uint64,
- float_, double, string,
- list_, struct, field,
- DataType, Field, Schema)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/array.pxd b/python/arrow/array.pxd
deleted file mode 100644
index 482f8f7..0000000
--- a/python/arrow/array.pxd
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport shared_ptr
-from arrow.includes.arrow cimport CArray, LogicalType
-
-from arrow.scalar import NA
-
-from arrow.schema cimport DataType
-
-cdef extern from "Python.h":
- int PySlice_Check(object)
-
-cdef class Array:
- cdef:
- shared_ptr[CArray] sp_array
- CArray* ap
-
- cdef readonly:
- DataType type
-
- cdef init(self, const shared_ptr[CArray]& sp_array)
- cdef getitem(self, int i)
-
-
-cdef class BooleanArray(Array):
- pass
-
-
-cdef class NumericArray(Array):
- pass
-
-
-cdef class Int8Array(NumericArray):
- pass
-
-
-cdef class UInt8Array(NumericArray):
- pass
-
-
-cdef class Int16Array(NumericArray):
- pass
-
-
-cdef class UInt16Array(NumericArray):
- pass
-
-
-cdef class Int32Array(NumericArray):
- pass
-
-
-cdef class UInt32Array(NumericArray):
- pass
-
-
-cdef class Int64Array(NumericArray):
- pass
-
-
-cdef class UInt64Array(NumericArray):
- pass
-
-
-cdef class ListArray(Array):
- pass
-
-
-cdef class StringArray(Array):
- pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx
deleted file mode 100644
index b367e3b..0000000
--- a/python/arrow/array.pyx
+++ /dev/null
@@ -1,192 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from arrow.includes.arrow cimport *
-cimport arrow.includes.pyarrow as pyarrow
-
-from arrow.compat import frombytes, tobytes
-from arrow.error cimport check_status
-
-cimport arrow.scalar as scalar
-from arrow.scalar import NA
-
-def total_allocated_bytes():
- cdef MemoryPool* pool = pyarrow.GetMemoryPool()
- return pool.bytes_allocated()
-
-
-cdef class Array:
-
- cdef init(self, const shared_ptr[CArray]& sp_array):
- self.sp_array = sp_array
- self.ap = sp_array.get()
- self.type = DataType()
- self.type.init(self.sp_array.get().type())
-
- property null_count:
-
- def __get__(self):
- return self.sp_array.get().null_count()
-
- def __iter__(self):
- for i in range(len(self)):
- yield self.getitem(i)
- raise StopIteration
-
- def __repr__(self):
- from arrow.formatting import array_format
- type_format = object.__repr__(self)
- values = array_format(self, window=10)
- return '{0}\n{1}'.format(type_format, values)
-
- def __len__(self):
- return self.sp_array.get().length()
-
- def isnull(self):
- raise NotImplemented
-
- def __getitem__(self, key):
- cdef:
- Py_ssize_t n = len(self)
-
- if PySlice_Check(key):
- start = key.start or 0
- while start < 0:
- start += n
-
- stop = key.stop if key.stop is not None else n
- while stop < 0:
- stop += n
-
- step = key.step or 1
- if step != 1:
- raise NotImplementedError
- else:
- return self.slice(start, stop)
-
- while key < 0:
- key += len(self)
-
- return self.getitem(key)
-
- cdef getitem(self, int i):
- return scalar.box_arrow_scalar(self.type, self.sp_array, i)
-
- def slice(self, start, end):
- pass
-
-
-cdef class NullArray(Array):
- pass
-
-
-cdef class BooleanArray(Array):
- pass
-
-
-cdef class NumericArray(Array):
- pass
-
-
-cdef class Int8Array(NumericArray):
- pass
-
-
-cdef class UInt8Array(NumericArray):
- pass
-
-
-cdef class Int16Array(NumericArray):
- pass
-
-
-cdef class UInt16Array(NumericArray):
- pass
-
-
-cdef class Int32Array(NumericArray):
- pass
-
-
-cdef class UInt32Array(NumericArray):
- pass
-
-
-cdef class Int64Array(NumericArray):
- pass
-
-
-cdef class UInt64Array(NumericArray):
- pass
-
-
-cdef class FloatArray(NumericArray):
- pass
-
-
-cdef class DoubleArray(NumericArray):
- pass
-
-
-cdef class ListArray(Array):
- pass
-
-
-cdef class StringArray(Array):
- pass
-
-
-cdef dict _array_classes = {
- LogicalType_NA: NullArray,
- LogicalType_BOOL: BooleanArray,
- LogicalType_INT64: Int64Array,
- LogicalType_DOUBLE: DoubleArray,
- LogicalType_LIST: ListArray,
- LogicalType_STRING: StringArray,
-}
-
-cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
- if sp_array.get() == NULL:
- raise ValueError('Array was NULL')
-
- cdef CDataType* data_type = sp_array.get().type().get()
-
- if data_type == NULL:
- raise ValueError('Array data type was NULL')
-
- cdef Array arr = _array_classes[data_type.type]()
- arr.init(sp_array)
- return arr
-
-
-def from_pylist(object list_obj, DataType type=None):
- """
- Convert Python list to Arrow array
- """
- cdef:
- shared_ptr[CArray] sp_array
-
- if type is None:
- check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
- else:
- raise NotImplementedError
-
- return box_arrow_array(sp_array)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/compat.py
----------------------------------------------------------------------
diff --git a/python/arrow/compat.py b/python/arrow/compat.py
deleted file mode 100644
index 08f0f23..0000000
--- a/python/arrow/compat.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# flake8: noqa
-
-import itertools
-
-import numpy as np
-
-import sys
-import six
-from six import BytesIO, StringIO, string_types as py_string
-
-
-PY26 = sys.version_info[:2] == (2, 6)
-PY2 = sys.version_info[0] == 2
-
-
-if PY26:
- import unittest2 as unittest
-else:
- import unittest
-
-
-if PY2:
- import cPickle
-
- try:
- from cdecimal import Decimal
- except ImportError:
- from decimal import Decimal
-
- unicode_type = unicode
- lzip = zip
- zip = itertools.izip
-
- def dict_values(x):
- return x.values()
-
- range = xrange
- long = long
-
- def u(s):
- return unicode(s, "unicode_escape")
-
- def tobytes(o):
- if isinstance(o, unicode):
- return o.encode('utf8')
- else:
- return o
-
- def frombytes(o):
- return o
-else:
- unicode_type = str
- def lzip(*x):
- return list(zip(*x))
- long = int
- zip = zip
- def dict_values(x):
- return list(x.values())
- from decimal import Decimal
- range = range
-
- def u(s):
- return s
-
- def tobytes(o):
- if isinstance(o, str):
- return o.encode('utf8')
- else:
- return o
-
- def frombytes(o):
- return o.decode('utf8')
-
-
-integer_types = six.integer_types + (np.integer,)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/config.pyx b/python/arrow/config.pyx
deleted file mode 100644
index 521bc06..0000000
--- a/python/arrow/config.pyx
+++ /dev/null
@@ -1,8 +0,0 @@
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-cdef extern from 'pyarrow/init.h' namespace 'pyarrow':
- void pyarrow_init()
-
-pyarrow_init()
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/error.pxd b/python/arrow/error.pxd
deleted file mode 100644
index c18cb3e..0000000
--- a/python/arrow/error.pxd
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.pyarrow cimport *
-
-cdef check_status(const Status& status)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/error.pyx b/python/arrow/error.pyx
deleted file mode 100644
index f1d5163..0000000
--- a/python/arrow/error.pyx
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport c_string
-
-from arrow.compat import frombytes
-
-class ArrowException(Exception):
- pass
-
-cdef check_status(const Status& status):
- if status.ok():
- return
-
- cdef c_string c_message = status.ToString()
- raise ArrowException(frombytes(c_message))
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/formatting.py
----------------------------------------------------------------------
diff --git a/python/arrow/formatting.py b/python/arrow/formatting.py
deleted file mode 100644
index a42d4e4..0000000
--- a/python/arrow/formatting.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Pretty-printing and other formatting utilities for Arrow data structures
-
-import arrow.scalar as scalar
-
-
-def array_format(arr, window=None):
- values = []
-
- if window is None or window * 2 >= len(arr):
- for x in arr:
- values.append(value_format(x, 0))
- contents = _indent(',\n'.join(values), 2)
- else:
- for i in range(window):
- values.append(value_format(arr[i], 0) + ',')
- values.append('...')
- for i in range(len(arr) - window, len(arr)):
- formatted = value_format(arr[i], 0)
- if i < len(arr) - 1:
- formatted += ','
- values.append(formatted)
- contents = _indent('\n'.join(values), 2)
-
- return '[\n{0}\n]'.format(contents)
-
-
-def value_format(x, indent_level=0):
- if isinstance(x, scalar.ListValue):
- contents = ',\n'.join(value_format(item) for item in x)
- return '[{0}]'.format(_indent(contents, 1).strip())
- else:
- return repr(x)
-
-
-def _indent(text, spaces):
- if spaces == 0:
- return text
- block = ' ' * spaces
- return '\n'.join(block + x for x in text.split('\n'))
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/__init__.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/__init__.pxd b/python/arrow/includes/__init__.pxd
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/arrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd
deleted file mode 100644
index 0cc44c0..0000000
--- a/python/arrow/includes/arrow.pxd
+++ /dev/null
@@ -1,124 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from arrow.includes.common cimport *
-
-cdef extern from "arrow/api.h" namespace "arrow" nogil:
-
- enum LogicalType" arrow::LogicalType::type":
- LogicalType_NA" arrow::LogicalType::NA"
-
- LogicalType_BOOL" arrow::LogicalType::BOOL"
-
- LogicalType_UINT8" arrow::LogicalType::UINT8"
- LogicalType_INT8" arrow::LogicalType::INT8"
- LogicalType_UINT16" arrow::LogicalType::UINT16"
- LogicalType_INT16" arrow::LogicalType::INT16"
- LogicalType_UINT32" arrow::LogicalType::UINT32"
- LogicalType_INT32" arrow::LogicalType::INT32"
- LogicalType_UINT64" arrow::LogicalType::UINT64"
- LogicalType_INT64" arrow::LogicalType::INT64"
-
- LogicalType_FLOAT" arrow::LogicalType::FLOAT"
- LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
-
- LogicalType_STRING" arrow::LogicalType::STRING"
-
- LogicalType_LIST" arrow::LogicalType::LIST"
- LogicalType_STRUCT" arrow::LogicalType::STRUCT"
-
- cdef cppclass CDataType" arrow::DataType":
- LogicalType type
- c_bool nullable
-
- c_bool Equals(const CDataType* other)
-
- c_string ToString()
-
- cdef cppclass MemoryPool" arrow::MemoryPool":
- int64_t bytes_allocated()
-
- cdef cppclass CListType" arrow::ListType"(CDataType):
- CListType(const shared_ptr[CDataType]& value_type,
- c_bool nullable)
-
- cdef cppclass CStringType" arrow::StringType"(CDataType):
- pass
-
- cdef cppclass CField" arrow::Field":
- c_string name
- shared_ptr[CDataType] type
-
- CField(const c_string& name, const shared_ptr[CDataType]& type)
-
- cdef cppclass CStructType" arrow::StructType"(CDataType):
- CStructType(const vector[shared_ptr[CField]]& fields,
- c_bool nullable)
-
- cdef cppclass CSchema" arrow::Schema":
- CSchema(const shared_ptr[CField]& fields)
-
- cdef cppclass CArray" arrow::Array":
- const shared_ptr[CDataType]& type()
-
- int32_t length()
- int32_t null_count()
- LogicalType logical_type()
-
- c_bool IsNull(int i)
-
- cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
- uint8_t Value(int i)
-
- cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
- int8_t Value(int i)
-
- cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
- uint16_t Value(int i)
-
- cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
- int16_t Value(int i)
-
- cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
- uint32_t Value(int i)
-
- cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
- int32_t Value(int i)
-
- cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
- uint64_t Value(int i)
-
- cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
- int64_t Value(int i)
-
- cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
- float Value(int i)
-
- cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
- double Value(int i)
-
- cdef cppclass CListArray" arrow::ListArray"(CArray):
- const int32_t* offsets()
- int32_t offset(int i)
- int32_t value_length(int i)
- const shared_ptr[CArray]& values()
- const shared_ptr[CDataType]& value_type()
-
- cdef cppclass CStringArray" arrow::StringArray"(CListArray):
- c_string GetString(int i)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/common.pxd b/python/arrow/includes/common.pxd
deleted file mode 100644
index 839427a..0000000
--- a/python/arrow/includes/common.pxd
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from libc.stdint cimport *
-from libcpp cimport bool as c_bool
-from libcpp.string cimport string as c_string
-from libcpp.vector cimport vector
-
-# This must be included for cerr and other things to work
-cdef extern from "<iostream>":
- pass
-
-cdef extern from "<memory>" namespace "std" nogil:
-
- cdef cppclass shared_ptr[T]:
- shared_ptr()
- shared_ptr(T*)
- T* get()
- void reset()
- void reset(T* p)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/parquet.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/parquet.pxd b/python/arrow/includes/parquet.pxd
deleted file mode 100644
index 62342f3..0000000
--- a/python/arrow/includes/parquet.pxd
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from arrow.includes.common cimport *
-
-cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
- cdef cppclass ColumnReader:
- pass
-
- cdef cppclass BoolReader(ColumnReader):
- pass
-
- cdef cppclass Int32Reader(ColumnReader):
- pass
-
- cdef cppclass Int64Reader(ColumnReader):
- pass
-
- cdef cppclass Int96Reader(ColumnReader):
- pass
-
- cdef cppclass FloatReader(ColumnReader):
- pass
-
- cdef cppclass DoubleReader(ColumnReader):
- pass
-
- cdef cppclass ByteArrayReader(ColumnReader):
- pass
-
- cdef cppclass RowGroupReader:
- pass
-
- cdef cppclass ParquetFileReader:
- pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/pyarrow.pxd b/python/arrow/includes/pyarrow.pxd
deleted file mode 100644
index 3eed5b8..0000000
--- a/python/arrow/includes/pyarrow.pxd
+++ /dev/null
@@ -1,45 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from arrow.includes.common cimport *
-from arrow.includes.arrow cimport (CArray, CDataType, LogicalType,
- MemoryPool)
-
-cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
- # We can later add more of the common status factory methods as needed
- cdef Status Status_OK "Status::OK"()
-
- cdef cppclass Status:
- Status()
-
- c_string ToString()
-
- c_bool ok()
- c_bool IsOutOfMemory()
- c_bool IsKeyError()
- c_bool IsTypeError()
- c_bool IsIOError()
- c_bool IsValueError()
- c_bool IsNotImplemented()
- c_bool IsArrowError()
-
- shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
- Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
-
- MemoryPool* GetMemoryPool()
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/parquet.pyx b/python/arrow/parquet.pyx
deleted file mode 100644
index 23c3838..0000000
--- a/python/arrow/parquet.pyx
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from arrow.compat import frombytes, tobytes
-from arrow.includes.parquet cimport *
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pxd b/python/arrow/scalar.pxd
deleted file mode 100644
index 4e0a364..0000000
--- a/python/arrow/scalar.pxd
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport *
-from arrow.includes.arrow cimport *
-
-from arrow.schema cimport DataType
-
-cdef class Scalar:
- cdef readonly:
- DataType type
-
-
-cdef class NAType(Scalar):
- pass
-
-
-cdef class ArrayValue(Scalar):
- cdef:
- shared_ptr[CArray] sp_array
- int index
-
- cdef void init(self, DataType type,
- const shared_ptr[CArray]& sp_array, int index)
-
- cdef void _set_array(self, const shared_ptr[CArray]& sp_array)
-
-
-cdef class Int8Value(ArrayValue):
- pass
-
-
-cdef class Int64Value(ArrayValue):
- pass
-
-
-cdef class ListValue(ArrayValue):
- cdef readonly:
- DataType value_type
-
- cdef:
- CListArray* ap
-
- cdef getitem(self, int i)
-
-
-cdef class StringValue(ArrayValue):
- pass
-
-cdef object box_arrow_scalar(DataType type,
- const shared_ptr[CArray]& sp_array,
- int index)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pyx b/python/arrow/scalar.pyx
deleted file mode 100644
index 72a280e..0000000
--- a/python/arrow/scalar.pyx
+++ /dev/null
@@ -1,198 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.schema cimport DataType, box_data_type
-
-from arrow.compat import frombytes
-import arrow.schema as schema
-
-NA = None
-
-cdef class NAType(Scalar):
-
- def __cinit__(self):
- global NA
- if NA is not None:
- raise Exception('Cannot create multiple NAType instances')
-
- self.type = schema.null()
-
- def __repr__(self):
- return 'NA'
-
- def as_py(self):
- return None
-
-NA = NAType()
-
-cdef class ArrayValue(Scalar):
-
- cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
- int index):
- self.type = type
- self.index = index
- self._set_array(sp_array)
-
- cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
- self.sp_array = sp_array
-
- def __repr__(self):
- if hasattr(self, 'as_py'):
- return repr(self.as_py())
- else:
- return Scalar.__repr__(self)
-
-
-cdef class BooleanValue(ArrayValue):
- pass
-
-
-cdef class Int8Value(ArrayValue):
-
- def as_py(self):
- cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class UInt8Value(ArrayValue):
-
- def as_py(self):
- cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class Int16Value(ArrayValue):
-
- def as_py(self):
- cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class UInt16Value(ArrayValue):
-
- def as_py(self):
- cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class Int32Value(ArrayValue):
-
- def as_py(self):
- cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class UInt32Value(ArrayValue):
-
- def as_py(self):
- cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class Int64Value(ArrayValue):
-
- def as_py(self):
- cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class UInt64Value(ArrayValue):
-
- def as_py(self):
- cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class FloatValue(ArrayValue):
-
- def as_py(self):
- cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class DoubleValue(ArrayValue):
-
- def as_py(self):
- cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
- return ap.Value(self.index)
-
-
-cdef class StringValue(ArrayValue):
-
- def as_py(self):
- cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
- return frombytes(ap.GetString(self.index))
-
-
-cdef class ListValue(ArrayValue):
-
- def __len__(self):
- return self.ap.value_length(self.index)
-
- def __getitem__(self, i):
- return self.getitem(i)
-
- def __iter__(self):
- for i in range(len(self)):
- yield self.getitem(i)
- raise StopIteration
-
- cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
- self.sp_array = sp_array
- self.ap = <CListArray*> sp_array.get()
- self.value_type = box_data_type(self.ap.value_type())
-
- cdef getitem(self, int i):
- cdef int j = self.ap.offset(self.index) + i
- return box_arrow_scalar(self.value_type, self.ap.values(), j)
-
- def as_py(self):
- cdef:
- int j
- list result = []
-
- for j in range(len(self)):
- result.append(self.getitem(j).as_py())
-
- return result
-
-
-cdef dict _scalar_classes = {
- LogicalType_UINT8: Int8Value,
- LogicalType_UINT16: Int16Value,
- LogicalType_UINT32: Int32Value,
- LogicalType_UINT64: Int64Value,
- LogicalType_INT8: Int8Value,
- LogicalType_INT16: Int16Value,
- LogicalType_INT32: Int32Value,
- LogicalType_INT64: Int64Value,
- LogicalType_FLOAT: FloatValue,
- LogicalType_DOUBLE: DoubleValue,
- LogicalType_LIST: ListValue,
- LogicalType_STRING: StringValue
-}
-
-cdef object box_arrow_scalar(DataType type,
- const shared_ptr[CArray]& sp_array,
- int index):
- cdef ArrayValue val
- if sp_array.get().IsNull(index):
- return NA
- else:
- val = _scalar_classes[type.type.type]()
- val.init(type, sp_array, index)
- return val
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pxd b/python/arrow/schema.pxd
deleted file mode 100644
index 8cc244a..0000000
--- a/python/arrow/schema.pxd
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.includes.common cimport shared_ptr
-from arrow.includes.arrow cimport CDataType, CField, CSchema
-
-cdef class DataType:
- cdef:
- shared_ptr[CDataType] sp_type
- CDataType* type
-
- cdef init(self, const shared_ptr[CDataType]& type)
-
-cdef class Field:
- cdef:
- shared_ptr[CField] sp_field
- CField* field
-
- cdef readonly:
- DataType type
-
-cdef class Schema:
- cdef:
- shared_ptr[CSchema] sp_schema
- CSchema* schema
-
-cdef DataType box_data_type(const shared_ptr[CDataType]& type)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx
deleted file mode 100644
index 3001531..0000000
--- a/python/arrow/schema.pyx
+++ /dev/null
@@ -1,164 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-########################################
-# Data types, fields, schemas, and so forth
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from arrow.compat import frombytes, tobytes
-from arrow.includes.arrow cimport *
-cimport arrow.includes.pyarrow as pyarrow
-
-cimport cpython
-
-cdef class DataType:
-
- def __cinit__(self):
- pass
-
- cdef init(self, const shared_ptr[CDataType]& type):
- self.sp_type = type
- self.type = type.get()
-
- def __str__(self):
- return frombytes(self.type.ToString())
-
- def __repr__(self):
- return 'DataType({0})'.format(str(self))
-
- def __richcmp__(DataType self, DataType other, int op):
- if op == cpython.Py_EQ:
- return self.type.Equals(other.type)
- elif op == cpython.Py_NE:
- return not self.type.Equals(other.type)
- else:
- raise TypeError('Invalid comparison')
-
-
-cdef class Field:
-
- def __cinit__(self, object name, DataType type):
- self.type = type
- self.sp_field.reset(new CField(tobytes(name), type.sp_type))
- self.field = self.sp_field.get()
-
- def __repr__(self):
- return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
-
- property name:
-
- def __get__(self):
- return frombytes(self.field.name)
-
-cdef dict _type_cache = {}
-
-cdef DataType primitive_type(LogicalType type, bint nullable=True):
- if (type, nullable) in _type_cache:
- return _type_cache[type, nullable]
-
- cdef DataType out = DataType()
- out.init(pyarrow.GetPrimitiveType(type, nullable))
-
- _type_cache[type, nullable] = out
- return out
-
-#------------------------------------------------------------
-# Type factory functions
-
-def field(name, type):
- return Field(name, type)
-
-cdef set PRIMITIVE_TYPES = set([
- LogicalType_NA, LogicalType_BOOL,
- LogicalType_UINT8, LogicalType_INT8,
- LogicalType_UINT16, LogicalType_INT16,
- LogicalType_UINT32, LogicalType_INT32,
- LogicalType_UINT64, LogicalType_INT64,
- LogicalType_FLOAT, LogicalType_DOUBLE])
-
-def null():
- return primitive_type(LogicalType_NA)
-
-def bool_(c_bool nullable=True):
- return primitive_type(LogicalType_BOOL, nullable)
-
-def uint8(c_bool nullable=True):
- return primitive_type(LogicalType_UINT8, nullable)
-
-def int8(c_bool nullable=True):
- return primitive_type(LogicalType_INT8, nullable)
-
-def uint16(c_bool nullable=True):
- return primitive_type(LogicalType_UINT16, nullable)
-
-def int16(c_bool nullable=True):
- return primitive_type(LogicalType_INT16, nullable)
-
-def uint32(c_bool nullable=True):
- return primitive_type(LogicalType_UINT32, nullable)
-
-def int32(c_bool nullable=True):
- return primitive_type(LogicalType_INT32, nullable)
-
-def uint64(c_bool nullable=True):
- return primitive_type(LogicalType_UINT64, nullable)
-
-def int64(c_bool nullable=True):
- return primitive_type(LogicalType_INT64, nullable)
-
-def float_(c_bool nullable=True):
- return primitive_type(LogicalType_FLOAT, nullable)
-
-def double(c_bool nullable=True):
- return primitive_type(LogicalType_DOUBLE, nullable)
-
-def string(c_bool nullable=True):
- """
- UTF8 string
- """
- return primitive_type(LogicalType_STRING, nullable)
-
-def list_(DataType value_type, c_bool nullable=True):
- cdef DataType out = DataType()
- out.init(shared_ptr[CDataType](
- new CListType(value_type.sp_type, nullable)))
- return out
-
-def struct(fields, c_bool nullable=True):
- """
-
- """
- cdef:
- DataType out = DataType()
- Field field
- vector[shared_ptr[CField]] c_fields
-
- for field in fields:
- c_fields.push_back(field.sp_field)
-
- out.init(shared_ptr[CDataType](
- new CStructType(c_fields, nullable)))
- return out
-
-
-cdef DataType box_data_type(const shared_ptr[CDataType]& type):
- cdef DataType out = DataType()
- out.init(type)
- return out
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/__init__.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/__init__.py b/python/arrow/tests/__init__.py
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_array.py b/python/arrow/tests/test_array.py
deleted file mode 100644
index ebd872c..0000000
--- a/python/arrow/tests/test_array.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest
-import arrow
-import arrow.formatting as fmt
-
-
-class TestArrayAPI(unittest.TestCase):
-
- def test_getitem_NA(self):
- arr = arrow.from_pylist([1, None, 2])
- assert arr[1] is arrow.NA
-
- def test_list_format(self):
- arr = arrow.from_pylist([[1], None, [2, 3]])
- result = fmt.array_format(arr)
- expected = """\
-[
- [1],
- NA,
- [2,
- 3]
-]"""
- assert result == expected
-
- def test_string_format(self):
- arr = arrow.from_pylist(['foo', None, 'bar'])
- result = fmt.array_format(arr)
- expected = """\
-[
- 'foo',
- NA,
- 'bar'
-]"""
- assert result == expected
-
- def test_long_array_format(self):
- arr = arrow.from_pylist(range(100))
- result = fmt.array_format(arr, window=2)
- expected = """\
-[
- 0,
- 1,
- ...
- 98,
- 99
-]"""
- assert result == expected
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_convert_builtin.py b/python/arrow/tests/test_convert_builtin.py
deleted file mode 100644
index 57e6ab9..0000000
--- a/python/arrow/tests/test_convert_builtin.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest
-import arrow
-
-
-class TestConvertList(unittest.TestCase):
-
- def test_boolean(self):
- pass
-
- def test_empty_list(self):
- arr = arrow.from_pylist([])
- assert len(arr) == 0
- assert arr.null_count == 0
- assert arr.type == arrow.null()
-
- def test_all_none(self):
- arr = arrow.from_pylist([None, None])
- assert len(arr) == 2
- assert arr.null_count == 2
- assert arr.type == arrow.null()
-
- def test_integer(self):
- arr = arrow.from_pylist([1, None, 3, None])
- assert len(arr) == 4
- assert arr.null_count == 2
- assert arr.type == arrow.int64()
-
- def test_garbage_collection(self):
- import gc
- bytes_before = arrow.total_allocated_bytes()
- arrow.from_pylist([1, None, 3, None])
- gc.collect()
- assert arrow.total_allocated_bytes() == bytes_before
-
- def test_double(self):
- data = [1.5, 1, None, 2.5, None, None]
- arr = arrow.from_pylist(data)
- assert len(arr) == 6
- assert arr.null_count == 3
- assert arr.type == arrow.double()
-
- def test_string(self):
- data = ['foo', b'bar', None, 'arrow']
- arr = arrow.from_pylist(data)
- assert len(arr) == 4
- assert arr.null_count == 1
- assert arr.type == arrow.string()
-
- def test_mixed_nesting_levels(self):
- arrow.from_pylist([1, 2, None])
- arrow.from_pylist([[1], [2], None])
- arrow.from_pylist([[1], [2], [None]])
-
- with self.assertRaises(arrow.ArrowException):
- arrow.from_pylist([1, 2, [1]])
-
- with self.assertRaises(arrow.ArrowException):
- arrow.from_pylist([1, 2, []])
-
- with self.assertRaises(arrow.ArrowException):
- arrow.from_pylist([[1], [2], [None, [1]]])
-
- def test_list_of_int(self):
- data = [[1, 2, 3], [], None, [1, 2]]
- arr = arrow.from_pylist(data)
- assert len(arr) == 4
- assert arr.null_count == 1
- assert arr.type == arrow.list_(arrow.int64())
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_scalars.py b/python/arrow/tests/test_scalars.py
deleted file mode 100644
index 951380b..0000000
--- a/python/arrow/tests/test_scalars.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest, u
-import arrow
-
-
-class TestScalars(unittest.TestCase):
-
- def test_null_singleton(self):
- with self.assertRaises(Exception):
- arrow.NAType()
-
- def test_bool(self):
- pass
-
- def test_int64(self):
- arr = arrow.from_pylist([1, 2, None])
-
- v = arr[0]
- assert isinstance(v, arrow.Int64Value)
- assert repr(v) == "1"
- assert v.as_py() == 1
-
- assert arr[2] is arrow.NA
-
- def test_double(self):
- arr = arrow.from_pylist([1.5, None, 3])
-
- v = arr[0]
- assert isinstance(v, arrow.DoubleValue)
- assert repr(v) == "1.5"
- assert v.as_py() == 1.5
-
- assert arr[1] is arrow.NA
-
- v = arr[2]
- assert v.as_py() == 3.0
-
- def test_string(self):
- arr = arrow.from_pylist(['foo', None, u('bar')])
-
- v = arr[0]
- assert isinstance(v, arrow.StringValue)
- assert repr(v) == "'foo'"
- assert v.as_py() == 'foo'
-
- assert arr[1] is arrow.NA
-
- v = arr[2].as_py()
- assert v == 'bar'
- assert isinstance(v, str)
-
- def test_list(self):
- arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
-
- v = arr[0]
- assert len(v) == 2
- assert isinstance(v, arrow.ListValue)
- assert repr(v) == "['foo', None]"
- assert v.as_py() == ['foo', None]
- assert v[0].as_py() == 'foo'
- assert v[1] is arrow.NA
-
- assert arr[1] is arrow.NA
-
- v = arr[3]
- assert len(v) == 0
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/arrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_schema.py b/python/arrow/tests/test_schema.py
deleted file mode 100644
index a89edd7..0000000
--- a/python/arrow/tests/test_schema.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from arrow.compat import unittest
-import arrow
-
-
-class TestTypes(unittest.TestCase):
-
- def test_integers(self):
- dtypes = ['int8', 'int16', 'int32', 'int64',
- 'uint8', 'uint16', 'uint32', 'uint64']
-
- for name in dtypes:
- factory = getattr(arrow, name)
- t = factory()
- t_required = factory(False)
-
- assert str(t) == name
- assert str(t_required) == '{0} not null'.format(name)
-
- def test_list(self):
- value_type = arrow.int32()
- list_type = arrow.list_(value_type)
- assert str(list_type) == 'list<int32>'
-
- def test_string(self):
- t = arrow.string()
- assert str(t) == 'string'
-
- def test_field(self):
- t = arrow.string()
- f = arrow.field('foo', t)
-
- assert f.name == 'foo'
- assert f.type is t
- assert repr(f) == "Field('foo', type=string)"
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/doc/INSTALL.md
----------------------------------------------------------------------
diff --git a/python/doc/INSTALL.md b/python/doc/INSTALL.md
new file mode 100644
index 0000000..d30a030
--- /dev/null
+++ b/python/doc/INSTALL.md
@@ -0,0 +1,87 @@
+## Building pyarrow (Apache Arrow Python library)
+
+First, clone the master git repository:
+
+```bash
+git clone https://github.com/apache/arrow.git arrow
+```
+
+#### System requirements
+
+Building pyarrow requires:
+
+* A C++11 compiler
+
+ * Linux: gcc >= 4.8 or clang >= 3.5
+ * OS X: XCode 6.4 or higher preferred
+
+* [cmake][1]
+
+#### Python requirements
+
+You will need Python (CPython) 2.7, 3.4, or 3.5 installed. Earlier releases and
+are not being targeted.
+
+> This library targets CPython only due to an emphasis on interoperability with
+> pandas and NumPy, which are only available for CPython.
+
+The build requires NumPy, Cython, and a few other Python dependencies:
+
+```bash
+pip install cython
+cd arrow/python
+pip install -r requirements.txt
+```
+
+#### Installing Arrow C++ library
+
+First, you should choose an installation location for Arrow C++. In the future
+using the default system install location will work, but for now we are being
+explicit:
+
+```bash
+export ARROW_HOME=$HOME/local
+```
+
+Now, we build Arrow:
+
+```bash
+cd arrow/cpp
+
+mkdir dev-build
+cd dev-build
+
+cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME ..
+
+make
+
+# Use sudo here if $ARROW_HOME requires it
+make install
+```
+
+#### Install `pyarrow`
+
+```bash
+cd arrow/python
+
+python setup.py install
+```
+
+> On XCode 6 and prior there are some known OS X `@rpath` issues. If you are
+> unable to import pyarrow, upgrading XCode may be the solution.
+
+
+```python
+In [1]: import pyarrow
+
+In [2]: pyarrow.from_pylist([1,2,3])
+Out[2]:
+<pyarrow.array.Int64Array object at 0x7f899f3e60e8>
+[
+ 1,
+ 2,
+ 3
+]
+```
+
+[1]: https://cmake.org/
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
new file mode 100644
index 0000000..8d93a15
--- /dev/null
+++ b/python/pyarrow/__init__.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+from pyarrow.array import (Array, from_pylist, total_allocated_bytes,
+ BooleanArray, NumericArray,
+ Int8Array, UInt8Array,
+ ListArray, StringArray)
+
+from pyarrow.error import ArrowException
+
+from pyarrow.scalar import (ArrayValue, Scalar, NA, NAType,
+ BooleanValue,
+ Int8Value, Int16Value, Int32Value, Int64Value,
+ UInt8Value, UInt16Value, UInt32Value, UInt64Value,
+ FloatValue, DoubleValue, ListValue, StringValue)
+
+from pyarrow.schema import (null, bool_,
+ int8, int16, int32, int64,
+ uint8, uint16, uint32, uint64,
+ float_, double, string,
+ list_, struct, field,
+ DataType, Field, Schema)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
new file mode 100644
index 0000000..d0d3486
--- /dev/null
+++ b/python/pyarrow/array.pxd
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.libarrow cimport CArray, LogicalType
+
+from pyarrow.scalar import NA
+
+from pyarrow.schema cimport DataType
+
+cdef extern from "Python.h":
+ int PySlice_Check(object)
+
+cdef class Array:
+ cdef:
+ shared_ptr[CArray] sp_array
+ CArray* ap
+
+ cdef readonly:
+ DataType type
+
+ cdef init(self, const shared_ptr[CArray]& sp_array)
+ cdef getitem(self, int i)
+
+
+cdef class BooleanArray(Array):
+ pass
+
+
+cdef class NumericArray(Array):
+ pass
+
+
+cdef class Int8Array(NumericArray):
+ pass
+
+
+cdef class UInt8Array(NumericArray):
+ pass
+
+
+cdef class Int16Array(NumericArray):
+ pass
+
+
+cdef class UInt16Array(NumericArray):
+ pass
+
+
+cdef class Int32Array(NumericArray):
+ pass
+
+
+cdef class UInt32Array(NumericArray):
+ pass
+
+
+cdef class Int64Array(NumericArray):
+ pass
+
+
+cdef class UInt64Array(NumericArray):
+ pass
+
+
+cdef class ListArray(Array):
+ pass
+
+
+cdef class StringArray(Array):
+ pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
new file mode 100644
index 0000000..bceb333
--- /dev/null
+++ b/python/pyarrow/array.pyx
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.error cimport check_status
+
+cimport pyarrow.scalar as scalar
+from pyarrow.scalar import NA
+
+def total_allocated_bytes():
+ cdef MemoryPool* pool = pyarrow.GetMemoryPool()
+ return pool.bytes_allocated()
+
+
+cdef class Array:
+
+ cdef init(self, const shared_ptr[CArray]& sp_array):
+ self.sp_array = sp_array
+ self.ap = sp_array.get()
+ self.type = DataType()
+ self.type.init(self.sp_array.get().type())
+
+ property null_count:
+
+ def __get__(self):
+ return self.sp_array.get().null_count()
+
+ def __iter__(self):
+ for i in range(len(self)):
+ yield self.getitem(i)
+ raise StopIteration
+
+ def __repr__(self):
+ from pyarrow.formatting import array_format
+ type_format = object.__repr__(self)
+ values = array_format(self, window=10)
+ return '{0}\n{1}'.format(type_format, values)
+
+ def __len__(self):
+ return self.sp_array.get().length()
+
+ def isnull(self):
+ raise NotImplemented
+
+ def __getitem__(self, key):
+ cdef:
+ Py_ssize_t n = len(self)
+
+ if PySlice_Check(key):
+ start = key.start or 0
+ while start < 0:
+ start += n
+
+ stop = key.stop if key.stop is not None else n
+ while stop < 0:
+ stop += n
+
+ step = key.step or 1
+ if step != 1:
+ raise NotImplementedError
+ else:
+ return self.slice(start, stop)
+
+ while key < 0:
+ key += len(self)
+
+ return self.getitem(key)
+
+ cdef getitem(self, int i):
+ return scalar.box_arrow_scalar(self.type, self.sp_array, i)
+
+ def slice(self, start, end):
+ pass
+
+
+cdef class NullArray(Array):
+ pass
+
+
+cdef class BooleanArray(Array):
+ pass
+
+
+cdef class NumericArray(Array):
+ pass
+
+
+cdef class Int8Array(NumericArray):
+ pass
+
+
+cdef class UInt8Array(NumericArray):
+ pass
+
+
+cdef class Int16Array(NumericArray):
+ pass
+
+
+cdef class UInt16Array(NumericArray):
+ pass
+
+
+cdef class Int32Array(NumericArray):
+ pass
+
+
+cdef class UInt32Array(NumericArray):
+ pass
+
+
+cdef class Int64Array(NumericArray):
+ pass
+
+
+cdef class UInt64Array(NumericArray):
+ pass
+
+
+cdef class FloatArray(NumericArray):
+ pass
+
+
+cdef class DoubleArray(NumericArray):
+ pass
+
+
+cdef class ListArray(Array):
+ pass
+
+
+cdef class StringArray(Array):
+ pass
+
+
+cdef dict _array_classes = {
+ LogicalType_NA: NullArray,
+ LogicalType_BOOL: BooleanArray,
+ LogicalType_INT64: Int64Array,
+ LogicalType_DOUBLE: DoubleArray,
+ LogicalType_LIST: ListArray,
+ LogicalType_STRING: StringArray,
+}
+
+cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
+ if sp_array.get() == NULL:
+ raise ValueError('Array was NULL')
+
+ cdef CDataType* data_type = sp_array.get().type().get()
+
+ if data_type == NULL:
+ raise ValueError('Array data type was NULL')
+
+ cdef Array arr = _array_classes[data_type.type]()
+ arr.init(sp_array)
+ return arr
+
+
+def from_pylist(object list_obj, DataType type=None):
+ """
+ Convert Python list to Arrow array
+ """
+ cdef:
+ shared_ptr[CArray] sp_array
+
+ if type is None:
+ check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
+ else:
+ raise NotImplementedError
+
+ return box_arrow_array(sp_array)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/compat.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
new file mode 100644
index 0000000..08f0f23
--- /dev/null
+++ b/python/pyarrow/compat.py
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+import itertools
+
+import numpy as np
+
+import sys
+import six
+from six import BytesIO, StringIO, string_types as py_string
+
+
+PY26 = sys.version_info[:2] == (2, 6)
+PY2 = sys.version_info[0] == 2
+
+
+if PY26:
+ import unittest2 as unittest
+else:
+ import unittest
+
+
+if PY2:
+ import cPickle
+
+ try:
+ from cdecimal import Decimal
+ except ImportError:
+ from decimal import Decimal
+
+ unicode_type = unicode
+ lzip = zip
+ zip = itertools.izip
+
+ def dict_values(x):
+ return x.values()
+
+ range = xrange
+ long = long
+
+ def u(s):
+ return unicode(s, "unicode_escape")
+
+ def tobytes(o):
+ if isinstance(o, unicode):
+ return o.encode('utf8')
+ else:
+ return o
+
+ def frombytes(o):
+ return o
+else:
+ unicode_type = str
+ def lzip(*x):
+ return list(zip(*x))
+ long = int
+ zip = zip
+ def dict_values(x):
+ return list(x.values())
+ from decimal import Decimal
+ range = range
+
+ def u(s):
+ return s
+
+ def tobytes(o):
+ if isinstance(o, str):
+ return o.encode('utf8')
+ else:
+ return o
+
+ def frombytes(o):
+ return o.decode('utf8')
+
+
+integer_types = six.integer_types + (np.integer,)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/config.pyx b/python/pyarrow/config.pyx
new file mode 100644
index 0000000..521bc06
--- /dev/null
+++ b/python/pyarrow/config.pyx
@@ -0,0 +1,8 @@
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+cdef extern from 'pyarrow/init.h' namespace 'pyarrow':
+ void pyarrow_init()
+
+pyarrow_init()
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pxd b/python/pyarrow/error.pxd
new file mode 100644
index 0000000..d226abe
--- /dev/null
+++ b/python/pyarrow/error.pxd
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.pyarrow cimport *
+
+cdef check_status(const Status& status)
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
new file mode 100644
index 0000000..3f8d7dd
--- /dev/null
+++ b/python/pyarrow/error.pyx
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport c_string
+from pyarrow.compat import frombytes
+
+class ArrowException(Exception):
+ pass
+
+cdef check_status(const Status& status):
+ if status.ok():
+ return
+
+ cdef c_string c_message = status.ToString()
+ raise ArrowException(frombytes(c_message))
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/formatting.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/formatting.py b/python/pyarrow/formatting.py
new file mode 100644
index 0000000..5fe0611
--- /dev/null
+++ b/python/pyarrow/formatting.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Pretty-printing and other formatting utilities for Arrow data structures
+
+import pyarrow.scalar as scalar
+
+
+def array_format(arr, window=None):
+ values = []
+
+ if window is None or window * 2 >= len(arr):
+ for x in arr:
+ values.append(value_format(x, 0))
+ contents = _indent(',\n'.join(values), 2)
+ else:
+ for i in range(window):
+ values.append(value_format(arr[i], 0) + ',')
+ values.append('...')
+ for i in range(len(arr) - window, len(arr)):
+ formatted = value_format(arr[i], 0)
+ if i < len(arr) - 1:
+ formatted += ','
+ values.append(formatted)
+ contents = _indent('\n'.join(values), 2)
+
+ return '[\n{0}\n]'.format(contents)
+
+
+def value_format(x, indent_level=0):
+ if isinstance(x, scalar.ListValue):
+ contents = ',\n'.join(value_format(item) for item in x)
+ return '[{0}]'.format(_indent(contents, 1).strip())
+ else:
+ return repr(x)
+
+
+def _indent(text, spaces):
+ if spaces == 0:
+ return text
+ block = ' ' * spaces
+ return '\n'.join(block + x for x in text.split('\n'))
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/__init__.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/__init__.pxd b/python/pyarrow/includes/__init__.pxd
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
new file mode 100644
index 0000000..839427a
--- /dev/null
+++ b/python/pyarrow/includes/common.pxd
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from libc.stdint cimport *
+from libcpp cimport bool as c_bool
+from libcpp.string cimport string as c_string
+from libcpp.vector cimport vector
+
+# This must be included for cerr and other things to work
+cdef extern from "<iostream>":
+ pass
+
+cdef extern from "<memory>" namespace "std" nogil:
+
+ cdef cppclass shared_ptr[T]:
+ shared_ptr()
+ shared_ptr(T*)
+ T* get()
+ void reset()
+ void reset(T* p)