You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/07/17 15:39:29 UTC
[arrow] branch master updated: ARROW-564: [Python] Add
Array.to_numpy()
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8d8645c ARROW-564: [Python] Add Array.to_numpy()
8d8645c is described below
commit 8d8645ceaf41c53636865162478c699c4eab8086
Author: Florian Rathgeber <fl...@gmail.com>
AuthorDate: Tue Jul 17 17:39:12 2018 +0200
ARROW-564: [Python] Add Array.to_numpy()
Author: Florian Rathgeber <fl...@gmail.com>
Author: Antoine Pitrou <pi...@free.fr>
Closes #1931 from kynan/ARROW-564 and squashes the following commits:
2f14cb2 <Antoine Pitrou> Make assertion stricter
79d6877 <Florian Rathgeber> ARROW-564: Address code review comments
3ec8a36 <Florian Rathgeber> ARROW-564: Refactor test_to_numpy_zero_copy
2672e82 <Florian Rathgeber> ARROW-564: Add support for return zero copy NumPy arrays
---
python/pyarrow/array.pxi | 15 +++++++++++++
python/pyarrow/tests/test_array.py | 46 ++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index de59509..c2eb870 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -604,6 +604,21 @@ cdef class Array:
self, &out))
return wrap_array_output(out)
+ def to_numpy(self):
+ """
+ Construct a NumPy view of this array
+ """
+ if self.null_count:
+ raise NotImplementedError('NumPy array view is only supported '
+ 'for arrays without nulls.')
+ if not is_primitive(self.type.id):
+ raise NotImplementedError('NumPy array view is only supported '
+ 'for primitive types.')
+ buflist = self.buffers()
+ assert len(buflist) == 2
+ return np.frombuffer(buflist[-1], dtype=self.type.to_pandas_dtype())[
+ self.offset:self.offset + len(self)]
+
def to_pylist(self):
"""
Convert to an list of native Python objects.
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 3852211..29583e8 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -87,6 +87,27 @@ def test_long_array_format():
assert result == expected
+def test_to_numpy_zero_copy():
+ arr = pa.array(range(10))
+ old_refcount = sys.getrefcount(arr)
+
+ np_arr = arr.to_numpy()
+ np_arr[0] = 1
+ assert arr[0] == 1
+
+ assert sys.getrefcount(arr) == old_refcount
+
+ arr = None
+ import gc
+ gc.collect()
+
+ # Ensure base is still valid
+ assert np_arr.base is not None
+ expected = np.arange(10)
+ expected[0] = 1
+ np.testing.assert_array_equal(np_arr, expected)
+
+
def test_to_pandas_zero_copy():
import gc
@@ -618,6 +639,31 @@ def test_array_pickle(data, typ):
@pytest.mark.parametrize(
+ 'narr',
+ [
+ np.arange(10, dtype=np.int64),
+ np.arange(10, dtype=np.int32),
+ np.arange(10, dtype=np.int16),
+ np.arange(10, dtype=np.int8),
+ np.arange(10, dtype=np.uint64),
+ np.arange(10, dtype=np.uint32),
+ np.arange(10, dtype=np.uint16),
+ np.arange(10, dtype=np.uint8),
+ np.arange(10, dtype=np.float64),
+ np.arange(10, dtype=np.float32),
+ np.arange(10, dtype=np.float16),
+ ]
+)
+def test_to_numpy_roundtrip(narr):
+ arr = pa.array(narr)
+ assert narr.dtype == arr.to_numpy().dtype
+ np.testing.assert_array_equal(narr, arr.to_numpy())
+ np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
+ np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
+ np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
+
+
+@pytest.mark.parametrize(
('type', 'expected'),
[
(pa.null(), 'empty'),