You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/07/17 15:39:29 UTC

[arrow] branch master updated: ARROW-564: [Python] Add Array.to_numpy()

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d8645c  ARROW-564: [Python] Add Array.to_numpy()
8d8645c is described below

commit 8d8645ceaf41c53636865162478c699c4eab8086
Author: Florian Rathgeber <fl...@gmail.com>
AuthorDate: Tue Jul 17 17:39:12 2018 +0200

    ARROW-564: [Python] Add Array.to_numpy()
    
    Author: Florian Rathgeber <fl...@gmail.com>
    Author: Antoine Pitrou <pi...@free.fr>
    
    Closes #1931 from kynan/ARROW-564 and squashes the following commits:
    
    2f14cb2 <Antoine Pitrou> Make assertion stricter
    79d6877 <Florian Rathgeber> ARROW-564:  Address code review comments
    3ec8a36 <Florian Rathgeber> ARROW-564:  Refactor test_to_numpy_zero_copy
    2672e82 <Florian Rathgeber> ARROW-564:  Add support for return zero copy NumPy arrays
---
 python/pyarrow/array.pxi           | 15 +++++++++++++
 python/pyarrow/tests/test_array.py | 46 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index de59509..c2eb870 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -604,6 +604,21 @@ cdef class Array:
                                               self, &out))
         return wrap_array_output(out)
 
+    def to_numpy(self):
+        """
+        Construct a NumPy view of this array
+        """
+        if self.null_count:
+            raise NotImplementedError('NumPy array view is only supported '
+                                      'for arrays without nulls.')
+        if not is_primitive(self.type.id):
+            raise NotImplementedError('NumPy array view is only supported '
+                                      'for primitive types.')
+        buflist = self.buffers()
+        assert len(buflist) == 2
+        return np.frombuffer(buflist[-1], dtype=self.type.to_pandas_dtype())[
+            self.offset:self.offset + len(self)]
+
     def to_pylist(self):
         """
         Convert to an list of native Python objects.
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 3852211..29583e8 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -87,6 +87,27 @@ def test_long_array_format():
     assert result == expected
 
 
+def test_to_numpy_zero_copy():
+    arr = pa.array(range(10))
+    old_refcount = sys.getrefcount(arr)
+
+    np_arr = arr.to_numpy()
+    np_arr[0] = 1
+    assert arr[0] == 1
+
+    assert sys.getrefcount(arr) == old_refcount
+
+    arr = None
+    import gc
+    gc.collect()
+
+    # Ensure base is still valid
+    assert np_arr.base is not None
+    expected = np.arange(10)
+    expected[0] = 1
+    np.testing.assert_array_equal(np_arr, expected)
+
+
 def test_to_pandas_zero_copy():
     import gc
 
@@ -618,6 +639,31 @@ def test_array_pickle(data, typ):
 
 
 @pytest.mark.parametrize(
+    'narr',
+    [
+        np.arange(10, dtype=np.int64),
+        np.arange(10, dtype=np.int32),
+        np.arange(10, dtype=np.int16),
+        np.arange(10, dtype=np.int8),
+        np.arange(10, dtype=np.uint64),
+        np.arange(10, dtype=np.uint32),
+        np.arange(10, dtype=np.uint16),
+        np.arange(10, dtype=np.uint8),
+        np.arange(10, dtype=np.float64),
+        np.arange(10, dtype=np.float32),
+        np.arange(10, dtype=np.float16),
+    ]
+)
+def test_to_numpy_roundtrip(narr):
+    arr = pa.array(narr)
+    assert narr.dtype == arr.to_numpy().dtype
+    np.testing.assert_array_equal(narr, arr.to_numpy())
+    np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
+    np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
+    np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
+
+
+@pytest.mark.parametrize(
     ('type', 'expected'),
     [
         (pa.null(), 'empty'),