You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/04/04 19:54:53 UTC
[arrow] branch master updated: ARROW-2276: [Python] Expose buffer
protocol on Tensor
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 640fc83 ARROW-2276: [Python] Expose buffer protocol on Tensor
640fc83 is described below
commit 640fc83fd8e6ebdfd1b4dca8b8ca36bca00f77f4
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Apr 4 21:54:26 2018 +0200
ARROW-2276: [Python] Expose buffer protocol on Tensor
Also add a bit_width property to the DataType class.
Author: Antoine Pitrou <an...@python.org>
Closes #1741 from pitrou/ARROW-2276-tensor-buffer-protocol and squashes the following commits:
104388a <Antoine Pitrou> ARROW-2276: Expose buffer protocol on Tensor
---
python/pyarrow/array.pxi | 24 +++++++++++++++++++
python/pyarrow/lib.pxd | 3 +++
python/pyarrow/tests/test_tensor.py | 27 ++++++++++++++++++++++
python/pyarrow/tests/test_types.py | 13 +++++++++++
python/pyarrow/types.pxi | 46 +++++++++++++++++++++++++++++++++++++
5 files changed, 113 insertions(+)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index a67bd8b..490a37b 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -651,6 +651,30 @@ strides: {0.strides}""".format(self)
self._validate()
return tuple(self.tp.strides())
+ def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+ self._validate()
+
+ buffer.buf = <char *> self.tp.data().get().data()
+ pep3118_format = self.type.pep3118_format
+ if pep3118_format is None:
+ raise NotImplementedError("type %s not supported for buffer "
+ "protocol" % (self.type,))
+ buffer.format = pep3118_format
+ buffer.itemsize = self.type.bit_width // 8
+ buffer.internal = NULL
+ buffer.len = self.tp.size() * buffer.itemsize
+ buffer.ndim = self.tp.ndim()
+ buffer.obj = self
+ if self.tp.is_mutable():
+ buffer.readonly = 0
+ else:
+ buffer.readonly = 1
+ # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
+ # and strides arrays lifetime is tied to the tensor's
+ buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
+ buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
+ buffer.suboffsets = NULL
+
cdef wrap_array_output(PyObject* output):
cdef object obj = PyObject_to_object(output)
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 4c24b48..6f4100f 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -20,6 +20,8 @@ from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow cimport CStatus
from cpython cimport PyObject
from libcpp cimport nullptr
+from libcpp.cast cimport dynamic_cast
+
cdef extern from "Python.h":
int PySlice_Check(object)
@@ -42,6 +44,7 @@ cdef class DataType:
cdef:
shared_ptr[CDataType] sp_type
CDataType* type
+ bytes pep3118_format
cdef void init(self, const shared_ptr[CDataType]& type)
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 093bc86..188a4a5 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -165,3 +165,30 @@ def test_read_tensor(tmpdir):
read_mmap = pa.memory_map(path, mode='r')
array = pa.read_tensor(read_mmap).to_numpy()
np.testing.assert_equal(data, array)
+
+
+@pytest.mark.skipif(sys.version_info < (3,),
+ reason="requires Python 3+")
+def test_tensor_memoryview():
+ # Tensors support the PEP 3118 buffer protocol
+ for dtype, expected_format in [(np.int8, '=b'),
+ (np.int64, '=q'),
+ (np.uint64, '=Q'),
+ (np.float16, 'e'),
+ (np.float64, 'd'),
+ ]:
+ data = np.arange(10, dtype=dtype)
+ dtype = data.dtype
+ lst = data.tolist()
+ tensor = pa.Tensor.from_numpy(data)
+ m = memoryview(tensor)
+ assert m.format == expected_format
+ assert m.shape == data.shape
+ assert m.strides == data.strides
+ assert m.ndim == 1
+ assert m.nbytes == data.nbytes
+ assert m.itemsize == data.itemsize
+ assert m.itemsize * 8 == tensor.type.bit_width
+ assert np.frombuffer(m, dtype).tolist() == lst
+ del tensor, data
+ assert np.frombuffer(m, dtype).tolist() == lst
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index b517020..5057359 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -230,6 +230,19 @@ def test_exact_primitive_types(t, check_func):
assert check_func(t)
+def test_bit_width():
+ for ty, expected in [(pa.bool_(), 1),
+ (pa.int8(), 8),
+ (pa.uint32(), 32),
+ (pa.float16(), 16),
+ (pa.decimal128(19, 4), 128),
+ (pa.binary(42), 42 * 8)]:
+ assert ty.bit_width == expected
+ for ty in [pa.binary(), pa.string(), pa.list_(pa.int16())]:
+ with pytest.raises(ValueError, match="fixed width"):
+ ty.bit_width
+
+
def test_fixed_size_binary_byte_width():
ty = pa.binary(5)
assert ty.byte_width == 5
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 2abdb30..cb3a72d 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -43,6 +43,42 @@ cdef dict _pandas_type_map = {
_Type_DECIMAL: np.object_,
}
+cdef dict _pep3118_type_map = {
+ _Type_INT8: b'b',
+ _Type_INT16: b'h',
+ _Type_INT32: b'i',
+ _Type_INT64: b'q',
+ _Type_UINT8: b'B',
+ _Type_UINT16: b'H',
+ _Type_UINT32: b'I',
+ _Type_UINT64: b'Q',
+ _Type_HALF_FLOAT: b'e',
+ _Type_FLOAT: b'f',
+ _Type_DOUBLE: b'd',
+}
+
+
+cdef bytes _datatype_to_pep3118(CDataType* type):
+ """
+ Construct a PEP 3118 format string describing the given datatype.
+ None is returned for unsupported types.
+ """
+ try:
+ char = _pep3118_type_map[type.id()]
+ except KeyError:
+ return None
+ else:
+ if char in b'bBhHiIqQ':
+ # Use "standard" int widths, not native
+ return b'=' + char
+ else:
+ return char
+
+
+# Workaround for Cython parsing bug
+# https://github.com/cython/cython/issues/2143
+ctypedef CFixedWidthType* _CFixedWidthTypePtr
+
cdef class DataType:
"""
@@ -54,12 +90,22 @@ cdef class DataType:
cdef void init(self, const shared_ptr[CDataType]& type):
self.sp_type = type
self.type = type.get()
+ self.pep3118_format = _datatype_to_pep3118(self.type)
property id:
def __get__(self):
return self.type.id()
+ property bit_width:
+
+ def __get__(self):
+ cdef _CFixedWidthTypePtr ty
+ ty = dynamic_cast[_CFixedWidthTypePtr](self.type)
+ if ty == nullptr:
+ raise ValueError("Non-fixed width type")
+ return ty.bit_width()
+
def __str__(self):
if self.type is NULL:
raise TypeError(
--
To stop receiving notification emails like this one, please contact
apitrou@apache.org.