You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/14 14:58:56 UTC

arrow git commit: ARROW-598: [Python] Add support for converting pyarrow.Buffer to a memoryview with zero copy

Repository: arrow
Updated Branches:
  refs/heads/master f442879d3 -> cef46152c


ARROW-598: [Python]  Add support for converting pyarrow.Buffer to a memoryview with zero copy

WIP, as tests are not all done and I'm assuming we'll need to keep a reference to the underlying buffer so it doesn't get gc'ed.

Author: Jeff Knupp <je...@jeffknupp.com>
Author: Jeff Knupp <je...@enigma.io>

Closes #369 from jeffknupp/master and squashes the following commits:

c300f30 [Jeff Knupp] Initialize members in init; test for lifetime with zero references
13f5dc1 [Jeff Knupp] WIP: python 2 compatability
170d01d [Jeff Knupp] WIP: python 2 compatability
bfbed0f [Jeff Knupp] WIP: add test for buffer protocol reference counting
fd1cb44 [Jeff Knupp] WIP: make buffers read-only; add test for immutability
c24e83a [Jeff Knupp] WIP: make arrow.io.Buffer implement Python's buffer protocol
b2540d4 [Jeff Knupp] ARROW-598: [Python]  Add support for converting pyarrow.Buffer to a memoryview with zero copy


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/cef46152
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/cef46152
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/cef46152

Branch: refs/heads/master
Commit: cef46152cc7489c23b67aaed70574dba742d19bb
Parents: f442879
Author: Jeff Knupp <je...@jeffknupp.com>
Authored: Tue Mar 14 10:58:44 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Mar 14 10:58:44 2017 -0400

----------------------------------------------------------------------
 python/pyarrow/io.pxd           |  2 ++
 python/pyarrow/io.pyx           | 16 ++++++++++++++-
 python/pyarrow/tests/test_io.py | 39 ++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/cef46152/python/pyarrow/io.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pxd b/python/pyarrow/io.pxd
index fffc7c5..3d73e11 100644
--- a/python/pyarrow/io.pxd
+++ b/python/pyarrow/io.pxd
@@ -25,6 +25,8 @@ from pyarrow.includes.libarrow_io cimport (ReadableFileInterface,
 cdef class Buffer:
     cdef:
         shared_ptr[CBuffer] buffer
+        Py_ssize_t shape[1]
+        Py_ssize_t strides[1]
 
     cdef init(self, const shared_ptr[CBuffer]& buffer)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/cef46152/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 4acef21..240ea24 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -56,7 +56,6 @@ cdef extern from "Python.h":
     PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
         char *v, Py_ssize_t len) except NULL
 
-
 cdef class NativeFile:
 
     def __cinit__(self):
@@ -421,6 +420,8 @@ cdef class Buffer:
 
     cdef init(self, const shared_ptr[CBuffer]& buffer):
         self.buffer = buffer
+        self.shape[0] = self.size
+        self.strides[0] = <Py_ssize_t>(1)
 
     def __len__(self):
         return self.size
@@ -449,6 +450,19 @@ cdef class Buffer:
             <const char*>self.buffer.get().data(),
             self.buffer.get().size())
 
+    def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+
+        buffer.buf = <char *>self.buffer.get().data()
+        buffer.format = 'b'
+        buffer.internal = NULL
+        buffer.itemsize = 1
+        buffer.len = self.size
+        buffer.ndim = 1
+        buffer.obj = self
+        buffer.readonly = 1
+        buffer.shape = self.shape
+        buffer.strides = self.strides
+        buffer.suboffsets = NULL
 
 cdef shared_ptr[PoolBuffer] allocate_buffer(CMemoryPool* pool):
     cdef shared_ptr[PoolBuffer] result

http://git-wip-us.apache.org/repos/asf/arrow/blob/cef46152/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index dfa84a2..c6caba5 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -135,6 +135,34 @@ def test_buffer_bytes():
 
     assert result == val
 
+def test_buffer_memoryview():
+    val = b'some data'
+
+    buf = io.buffer_from_bytes(val)
+    assert isinstance(buf, io.Buffer)
+
+    result = memoryview(buf)
+
+    assert result == val
+
+
+def test_buffer_memoryview_is_immutable():
+    val = b'some data'
+
+    buf = io.buffer_from_bytes(val)
+    assert isinstance(buf, io.Buffer)
+
+    result = memoryview(buf)
+
+    with pytest.raises(TypeError) as exc:
+        result[0] = b'h'
+        assert 'cannot modify read-only' in str(exc.value)
+
+    b = bytes(buf)
+    with pytest.raises(TypeError) as exc:
+        b[0] = b'h'
+        assert 'cannot modify read-only' in str(exc.value)
+
 
 def test_memory_output_stream():
     # 10 bytes
@@ -160,6 +188,17 @@ def test_inmemory_write_after_closed():
     with pytest.raises(IOError):
         f.write(b'not ok')
 
+def test_buffer_protocol_ref_counting():
+    import gc
+
+    def make_buffer(bytes_obj):
+        return bytearray(io.buffer_from_bytes(bytes_obj))
+
+    buf = make_buffer(b'foo')
+    gc.collect()
+    assert buf == b'foo'
+
+
 
 # ----------------------------------------------------------------------
 # OS files and memory maps