You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/04/18 14:37:08 UTC
arrow git commit: ARROW-818: [Python] Expand Sphinx API docs,
pyarrow.* namespace. Add factory functions for time32, time64
Repository: arrow
Updated Branches:
refs/heads/master bb287e203 -> 7f20f6e73
ARROW-818: [Python] Expand Sphinx API docs, pyarrow.* namespace. Add factory functions for time32, time64
Author: Wes McKinney <we...@twosigma.com>
Closes #557 from wesm/ARROW-818 and squashes the following commits:
96ce436 [Wes McKinney] Expand Sphinx API docs, pyarrow.* namespace. Add factory functions for time32, time64
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/7f20f6e7
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/7f20f6e7
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/7f20f6e7
Branch: refs/heads/master
Commit: 7f20f6e738a2e163b0b753416ee4c4ed00998f4b
Parents: bb287e2
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Apr 18 16:37:03 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Tue Apr 18 16:37:03 2017 +0200
----------------------------------------------------------------------
python/doc/source/api.rst | 69 +++++++++++++++++++++++-----
python/pyarrow/__init__.py | 33 ++++++++++----
python/pyarrow/_array.pxd | 10 +++++
python/pyarrow/_array.pyx | 74 ++++++++++++++++++++++++++++++-
python/pyarrow/_io.pyx | 6 +--
python/pyarrow/includes/libarrow.pxd | 3 ++
python/pyarrow/tests/test_io.py | 4 +-
python/pyarrow/tests/test_schema.py | 21 +++++++++
8 files changed, 195 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/doc/source/api.rst
----------------------------------------------------------------------
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 92e248b..08a0694 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -24,8 +24,8 @@ API Reference
.. _api.functions:
-Type Metadata and Schemas
--------------------------
+Type and Schema Factory Functions
+---------------------------------
.. autosummary::
:toctree: generated/
@@ -43,6 +43,8 @@ Type Metadata and Schemas
float16
float32
float64
+ time32
+ time64
timestamp
date32
date64
@@ -53,10 +55,8 @@ Type Metadata and Schemas
struct
dictionary
field
- DataType
- Field
- Schema
schema
+ from_numpy_dtype
Scalar Value Types
------------------
@@ -68,6 +68,7 @@ Scalar Value Types
NAType
Scalar
ArrayValue
+ BooleanValue
Int8Value
Int16Value
Int32Value
@@ -82,6 +83,11 @@ Scalar Value Types
BinaryValue
StringValue
FixedSizeBinaryValue
+ Date32Value
+ Date64Value
+ TimestampValue
+ DecimalValue
+
Array Types and Constructors
----------------------------
@@ -91,21 +97,30 @@ Array Types and Constructors
array
Array
- NullArray
- NumericArray
- IntegerArray
- FloatingPointArray
BooleanArray
+ DictionaryArray
+ FloatingPointArray
+ IntegerArray
Int8Array
Int16Array
Int32Array
Int64Array
+ NullArray
+ NumericArray
UInt8Array
UInt16Array
UInt32Array
UInt64Array
- DictionaryArray
+ BinaryArray
+ FixedSizeBinaryArray
StringArray
+ Time32Array
+ Time64Array
+ Date32Array
+ Date64Array
+ TimestampArray
+ DecimalArray
+ ListArray
Tables and Record Batches
-------------------------
@@ -113,9 +128,11 @@ Tables and Record Batches
.. autosummary::
:toctree: generated/
+ ChunkedArray
Column
RecordBatch
Table
+ get_record_batch_size
Tensor type and Functions
-------------------------
@@ -141,7 +158,7 @@ Input / Output and Shared Memory
MemoryMappedFile
memory_map
create_memory_map
- PythonFileInterface
+ PythonFile
Interprocess Communication and Messaging
----------------------------------------
@@ -165,3 +182,33 @@ Memory Pools
jemalloc_memory_pool
total_allocated_bytes
set_memory_pool
+
+Type Classes
+------------
+
+.. autosummary::
+ :toctree: generated/
+
+ DataType
+ DecimalType
+ DictionaryType
+ FixedSizeBinaryType
+ Time32Type
+ Time64Type
+ TimestampType
+ Field
+ Schema
+
+.. currentmodule:: pyarrow.parquet
+
+Apache Parquet
+--------------
+
+.. autosummary::
+ :toctree: generated/
+
+ ParquetDataset
+ ParquetFile
+ read_table
+ write_metadata
+ write_table
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 87f2352..4d8da9f 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -31,12 +31,20 @@ from pyarrow._config import cpu_count, set_cpu_count
from pyarrow._array import (null, bool_,
int8, int16, int32, int64,
uint8, uint16, uint32, uint64,
- timestamp, date32, date64,
+ time32, time64, timestamp, date32, date64,
float16, float32, float64,
binary, string, decimal,
list_, struct, dictionary, field,
- DataType, FixedSizeBinaryType,
- Field, Schema, schema,
+ DataType,
+ DecimalType,
+ DictionaryType,
+ FixedSizeBinaryType,
+ TimestampType,
+ Time32Type,
+ Time64Type,
+ Field,
+ Schema,
+ schema,
Array, Tensor,
array,
from_numpy_dtype,
@@ -47,25 +55,34 @@ from pyarrow._array import (null, bool_,
Int16Array, UInt16Array,
Int32Array, UInt32Array,
Int64Array, UInt64Array,
- ListArray, StringArray,
+ ListArray,
+ BinaryArray, StringArray,
+ FixedSizeBinaryArray,
DictionaryArray,
+ Date32Array, Date64Array,
+ TimestampArray, Time32Array, Time64Array,
+ DecimalArray,
ArrayValue, Scalar, NA, NAType,
BooleanValue,
Int8Value, Int16Value, Int32Value, Int64Value,
UInt8Value, UInt16Value, UInt32Value, UInt64Value,
FloatValue, DoubleValue, ListValue,
- BinaryValue, StringValue, FixedSizeBinaryValue)
+ BinaryValue, StringValue, FixedSizeBinaryValue,
+ DecimalValue,
+ Date32Value, Date64Value, TimestampValue)
-from pyarrow._io import (HdfsFile, NativeFile, PythonFileInterface,
+from pyarrow._io import (HdfsFile, NativeFile, PythonFile,
Buffer, BufferReader, InMemoryOutputStream,
OSFile, MemoryMappedFile, memory_map,
frombuffer, read_tensor, write_tensor,
memory_map, create_memory_map,
- get_record_batch_size, get_tensor_size)
+ get_record_batch_size, get_tensor_size,
+ have_libhdfs, have_libhdfs3)
from pyarrow._memory import (MemoryPool, total_allocated_bytes,
set_memory_pool, default_memory_pool)
-from pyarrow._table import Column, RecordBatch, Table, concat_tables
+from pyarrow._table import (ChunkedArray, Column, RecordBatch, Table,
+ concat_tables)
from pyarrow._error import (ArrowException,
ArrowKeyError,
ArrowInvalid,
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/_array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pxd b/python/pyarrow/_array.pxd
index afb0c27..464de31 100644
--- a/python/pyarrow/_array.pxd
+++ b/python/pyarrow/_array.pxd
@@ -42,6 +42,16 @@ cdef class TimestampType(DataType):
const CTimestampType* ts_type
+cdef class Time32Type(DataType):
+ cdef:
+ const CTime32Type* time_type
+
+
+cdef class Time64Type(DataType):
+ cdef:
+ const CTime64Type* time_type
+
+
cdef class FixedSizeBinaryType(DataType):
cdef:
const CFixedSizeBinaryType* fixed_size_binary_type
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/_array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_array.pyx b/python/pyarrow/_array.pyx
index e41380d..1c571ba 100644
--- a/python/pyarrow/_array.pyx
+++ b/python/pyarrow/_array.pyx
@@ -127,6 +127,30 @@ cdef class TimestampType(DataType):
return None
+cdef class Time32Type(DataType):
+
+ cdef void init(self, const shared_ptr[CDataType]& type):
+ DataType.init(self, type)
+ self.time_type = <const CTime32Type*> type.get()
+
+ property unit:
+
+ def __get__(self):
+ return timeunit_to_string(self.time_type.unit())
+
+
+cdef class Time64Type(DataType):
+
+ cdef void init(self, const shared_ptr[CDataType]& type):
+ DataType.init(self, type)
+ self.time_type = <const CTime64Type*> type.get()
+
+ property unit:
+
+ def __get__(self):
+ return timeunit_to_string(self.time_type.unit())
+
+
cdef class FixedSizeBinaryType(DataType):
cdef void init(self, const shared_ptr[CDataType]& type):
@@ -342,6 +366,7 @@ def int64():
cdef dict _timestamp_type_cache = {}
+cdef dict _time_type_cache = {}
cdef timeunit_to_string(TimeUnit unit):
@@ -369,7 +394,7 @@ def timestamp(unit_str, tz=None):
elif unit_str == 'ns':
unit = TimeUnit_NANO
else:
- raise TypeError('Invalid TimeUnit string')
+ raise ValueError('Invalid TimeUnit string')
cdef TimestampType out = TimestampType()
@@ -388,6 +413,50 @@ def timestamp(unit_str, tz=None):
return out
+def time32(unit_str):
+ cdef:
+ TimeUnit unit
+ c_string c_timezone
+
+ if unit_str == "s":
+ unit = TimeUnit_SECOND
+ elif unit_str == 'ms':
+ unit = TimeUnit_MILLI
+ else:
+ raise ValueError('Invalid TimeUnit for time32: {}'.format(unit_str))
+
+ cdef Time32Type out
+ if unit in _time_type_cache:
+ return _time_type_cache[unit]
+ else:
+ out = Time32Type()
+ out.init(ctime32(unit))
+ _time_type_cache[unit] = out
+ return out
+
+
+def time64(unit_str):
+ cdef:
+ TimeUnit unit
+ c_string c_timezone
+
+ if unit_str == "us":
+ unit = TimeUnit_MICRO
+ elif unit_str == 'ns':
+ unit = TimeUnit_NANO
+ else:
+ raise ValueError('Invalid TimeUnit for time64: {}'.format(unit_str))
+
+ cdef Time64Type out
+ if unit in _time_type_cache:
+ return _time_type_cache[unit]
+ else:
+ out = Time64Type()
+ out.init(ctime64(unit))
+ _time_type_cache[unit] = out
+ return out
+
+
def date32():
return primitive_type(_Type_DATE32)
@@ -516,6 +585,9 @@ cdef Schema box_schema(const shared_ptr[CSchema]& type):
def from_numpy_dtype(object dtype):
+ """
+ Convert NumPy dtype to pyarrow.DataType
+ """
cdef shared_ptr[CDataType] c_type
with nogil:
check_status(pyarrow.NumPyDtypeToArrow(dtype, &c_type))
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/_io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_io.pyx b/python/pyarrow/_io.pyx
index 09e8233..40c76f8 100644
--- a/python/pyarrow/_io.pyx
+++ b/python/pyarrow/_io.pyx
@@ -307,7 +307,7 @@ cdef class NativeFile:
# Python file-like objects
-cdef class PythonFileInterface(NativeFile):
+cdef class PythonFile(NativeFile):
cdef:
object handle
@@ -600,7 +600,7 @@ cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
source = BufferReader(source)
elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
# Optimistically hope this is file-like
- source = PythonFileInterface(source, mode='r')
+ source = PythonFile(source, mode='r')
if isinstance(source, NativeFile):
nf = source
@@ -622,7 +622,7 @@ cdef get_writer(object source, shared_ptr[OutputStream]* writer):
source = OSFile(source, mode='w')
elif not isinstance(source, NativeFile) and hasattr(source, 'write'):
# Optimistically hope this is file-like
- source = PythonFileInterface(source, mode='w')
+ source = PythonFile(source, mode='w')
if isinstance(source, NativeFile):
nf = source
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index ea835f6..473a0b9 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -106,6 +106,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CTime64Type" arrow::Time64Type"(CFixedWidthType):
TimeUnit unit()
+ shared_ptr[CDataType] ctime32" arrow::time32"(TimeUnit unit)
+ shared_ptr[CDataType] ctime64" arrow::time64"(TimeUnit unit)
+
cdef cppclass CDictionaryType" arrow::DictionaryType"(CFixedWidthType):
CDictionaryType(const shared_ptr[CDataType]& index_type,
const shared_ptr[CArray]& dictionary)
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index c5d3708..a14898f 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -32,7 +32,7 @@ import pyarrow as pa
def test_python_file_write():
buf = BytesIO()
- f = pa.PythonFileInterface(buf)
+ f = pa.PythonFile(buf)
assert f.tell() == 0
@@ -56,7 +56,7 @@ def test_python_file_read():
data = b'some sample data'
buf = BytesIO(data)
- f = pa.PythonFileInterface(buf, mode='r')
+ f = pa.PythonFile(buf, mode='r')
assert f.size() == len(data)
http://git-wip-us.apache.org/repos/asf/arrow/blob/7f20f6e7/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index d1107fb..da704f3 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -77,6 +77,27 @@ def test_type_timestamp_with_tz():
assert t.tz == tz
+def test_time_types():
+ t1 = pa.time32('s')
+ t2 = pa.time32('ms')
+ t3 = pa.time64('us')
+ t4 = pa.time64('ns')
+
+ assert t1.unit == 's'
+ assert t2.unit == 'ms'
+ assert t3.unit == 'us'
+ assert t4.unit == 'ns'
+
+ assert str(t1) == 'time32[s]'
+ assert str(t4) == 'time64[ns]'
+
+ with pytest.raises(ValueError):
+ pa.time32('us')
+
+ with pytest.raises(ValueError):
+ pa.time64('s')
+
+
def test_type_from_numpy_dtype_timestamps():
cases = [
(np.dtype('datetime64[s]'), pa.timestamp('s')),