You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/14 14:58:56 UTC
arrow git commit: ARROW-598: [Python] Add support for converting
pyarrow.Buffer to a memoryview with zero copy
Repository: arrow
Updated Branches:
refs/heads/master f442879d3 -> cef46152c
ARROW-598: [Python] Add support for converting pyarrow.Buffer to a memoryview with zero copy
WIP, as tests are not all done and I'm assuming we'll need to keep a reference to the underlying buffer so it doesn't get gc'ed.
Author: Jeff Knupp <je...@jeffknupp.com>
Author: Jeff Knupp <je...@enigma.io>
Closes #369 from jeffknupp/master and squashes the following commits:
c300f30 [Jeff Knupp] Initialize members in init; test for lifetime with zero references
13f5dc1 [Jeff Knupp] WIP: python 2 compatability
170d01d [Jeff Knupp] WIP: python 2 compatability
bfbed0f [Jeff Knupp] WIP: add test for buffer protocol reference counting
fd1cb44 [Jeff Knupp] WIP: make buffers read-only; add test for immutability
c24e83a [Jeff Knupp] WIP: make arrow.io.Buffer implement Python's buffer protocol
b2540d4 [Jeff Knupp] ARROW-598: [Python] Add support for converting pyarrow.Buffer to a memoryview with zero copy
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/cef46152
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/cef46152
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/cef46152
Branch: refs/heads/master
Commit: cef46152cc7489c23b67aaed70574dba742d19bb
Parents: f442879
Author: Jeff Knupp <je...@jeffknupp.com>
Authored: Tue Mar 14 10:58:44 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Mar 14 10:58:44 2017 -0400
----------------------------------------------------------------------
python/pyarrow/io.pxd | 2 ++
python/pyarrow/io.pyx | 16 ++++++++++++++-
python/pyarrow/tests/test_io.py | 39 ++++++++++++++++++++++++++++++++++++
3 files changed, 56 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/cef46152/python/pyarrow/io.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pxd b/python/pyarrow/io.pxd
index fffc7c5..3d73e11 100644
--- a/python/pyarrow/io.pxd
+++ b/python/pyarrow/io.pxd
@@ -25,6 +25,8 @@ from pyarrow.includes.libarrow_io cimport (ReadableFileInterface,
cdef class Buffer:
cdef:
shared_ptr[CBuffer] buffer
+ Py_ssize_t shape[1]
+ Py_ssize_t strides[1]
cdef init(self, const shared_ptr[CBuffer]& buffer)
http://git-wip-us.apache.org/repos/asf/arrow/blob/cef46152/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 4acef21..240ea24 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -56,7 +56,6 @@ cdef extern from "Python.h":
PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
char *v, Py_ssize_t len) except NULL
-
cdef class NativeFile:
def __cinit__(self):
@@ -421,6 +420,8 @@ cdef class Buffer:
cdef init(self, const shared_ptr[CBuffer]& buffer):
self.buffer = buffer
+ self.shape[0] = self.size
+ self.strides[0] = <Py_ssize_t>(1)
def __len__(self):
return self.size
@@ -449,6 +450,19 @@ cdef class Buffer:
<const char*>self.buffer.get().data(),
self.buffer.get().size())
+ def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+
+ buffer.buf = <char *>self.buffer.get().data()
+ buffer.format = 'b'
+ buffer.internal = NULL
+ buffer.itemsize = 1
+ buffer.len = self.size
+ buffer.ndim = 1
+ buffer.obj = self
+ buffer.readonly = 1
+ buffer.shape = self.shape
+ buffer.strides = self.strides
+ buffer.suboffsets = NULL
cdef shared_ptr[PoolBuffer] allocate_buffer(CMemoryPool* pool):
cdef shared_ptr[PoolBuffer] result
http://git-wip-us.apache.org/repos/asf/arrow/blob/cef46152/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index dfa84a2..c6caba5 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -135,6 +135,34 @@ def test_buffer_bytes():
assert result == val
+def test_buffer_memoryview():
+ val = b'some data'
+
+ buf = io.buffer_from_bytes(val)
+ assert isinstance(buf, io.Buffer)
+
+ result = memoryview(buf)
+
+ assert result == val
+
+
+def test_buffer_memoryview_is_immutable():
+ val = b'some data'
+
+ buf = io.buffer_from_bytes(val)
+ assert isinstance(buf, io.Buffer)
+
+ result = memoryview(buf)
+
+ with pytest.raises(TypeError) as exc:
+ result[0] = b'h'
+ assert 'cannot modify read-only' in str(exc.value)
+
+ b = bytes(buf)
+ with pytest.raises(TypeError) as exc:
+ b[0] = b'h'
+ assert 'cannot modify read-only' in str(exc.value)
+
def test_memory_output_stream():
# 10 bytes
@@ -160,6 +188,17 @@ def test_inmemory_write_after_closed():
with pytest.raises(IOError):
f.write(b'not ok')
+def test_buffer_protocol_ref_counting():
+ import gc
+
+ def make_buffer(bytes_obj):
+ return bytearray(io.buffer_from_bytes(bytes_obj))
+
+ buf = make_buffer(b'foo')
+ gc.collect()
+ assert buf == b'foo'
+
+
# ----------------------------------------------------------------------
# OS files and memory maps