You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/12/19 16:47:39 UTC

arrow git commit: ARROW-426: Python: Conversion from pyarrow.Array to a Python list

Repository: arrow
Updated Branches:
  refs/heads/master a2ead2f64 -> c369709c4


ARROW-426: Python: Conversion from pyarrow.Array to a Python list

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #242 from xhochy/ARROW-426 and squashes the following commits:

10739ac [Uwe L. Korn] ARROW-426: Python: Conversion from pyarrow.Array to a Python list


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c369709c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c369709c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c369709c

Branch: refs/heads/master
Commit: c369709c4f8157cb5e6c8121e1e613b104305aed
Parents: a2ead2f
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Dec 19 11:47:32 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Dec 19 11:47:32 2016 -0500

----------------------------------------------------------------------
 python/pyarrow/array.pyx                     |  6 ++++++
 python/pyarrow/scalar.pyx                    |  4 +++-
 python/pyarrow/table.pyx                     | 15 +++++++++++++++
 python/pyarrow/tests/test_column.py          |  1 +
 python/pyarrow/tests/test_convert_builtin.py | 13 +++++++++++--
 5 files changed, 36 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 6c86275..d44212f 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -167,6 +167,12 @@ cdef class Array:
 
         return PyObject_to_object(np_arr)
 
+    def to_pylist(self):
+        """
+        Convert to an list of native Python objects.
+        """
+        return [x.as_py() for x in self]
+
 
 cdef class NullArray(Array):
     pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 0d391e5..c2d20e4 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -194,7 +194,9 @@ cdef object box_arrow_scalar(DataType type,
                              const shared_ptr[CArray]& sp_array,
                              int index):
     cdef ArrayValue val
-    if sp_array.get().IsNull(index):
+    if type.type.type == Type_NA:
+        return NA
+    elif sp_array.get().IsNull(index):
         return NA
     else:
         val = _scalar_classes[type.type.type]()

http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/table.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
index 333686f..2f7d430 100644
--- a/python/pyarrow/table.pyx
+++ b/python/pyarrow/table.pyx
@@ -108,6 +108,15 @@ cdef class ChunkedArray:
         for i in range(self.num_chunks):
             yield self.chunk(i)
 
+    def to_pylist(self):
+        """
+        Convert to a list of native Python objects.
+        """
+        result = []
+        for i in range(self.num_chunks):
+            result += self.chunk(i).to_pylist()
+        return result
+
 
 cdef class Column:
     """
@@ -143,6 +152,12 @@ cdef class Column:
 
         return pd.Series(PyObject_to_object(arr), name=self.name)
 
+    def to_pylist(self):
+        """
+        Convert to a list of native Python objects.
+        """
+        return self.data.to_pylist()
+
     cdef _check_nullptr(self):
         if self.column == NULL:
             raise ReferenceError("Column object references a NULL pointer."

http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/tests/test_column.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_column.py b/python/pyarrow/tests/test_column.py
index b62f582..32202cb 100644
--- a/python/pyarrow/tests/test_column.py
+++ b/python/pyarrow/tests/test_column.py
@@ -35,6 +35,7 @@ class TestColumn(unittest.TestCase):
         assert column.length() == 5
         assert len(column) == 5
         assert column.shape == (5,)
+        assert column.to_pylist() == [-10, -5, 0, 5, 10]
 
     def test_pandas(self):
         data = [

http://git-wip-us.apache.org/repos/asf/arrow/blob/c369709c/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 8937f8d..34371b0 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -22,28 +22,34 @@ import pyarrow
 class TestConvertList(unittest.TestCase):
 
     def test_boolean(self):
-        arr = pyarrow.from_pylist([True, None, False, None])
+        expected = [True, None, False, None]
+        arr = pyarrow.from_pylist(expected)
         assert len(arr) == 4
         assert arr.null_count == 2
         assert arr.type == pyarrow.bool_()
+        assert arr.to_pylist() == expected
 
     def test_empty_list(self):
         arr = pyarrow.from_pylist([])
         assert len(arr) == 0
         assert arr.null_count == 0
         assert arr.type == pyarrow.null()
+        assert arr.to_pylist() == []
 
     def test_all_none(self):
         arr = pyarrow.from_pylist([None, None])
         assert len(arr) == 2
         assert arr.null_count == 2
         assert arr.type == pyarrow.null()
+        assert arr.to_pylist() == [None, None]
 
     def test_integer(self):
-        arr = pyarrow.from_pylist([1, None, 3, None])
+        expected = [1, None, 3, None]
+        arr = pyarrow.from_pylist(expected)
         assert len(arr) == 4
         assert arr.null_count == 2
         assert arr.type == pyarrow.int64()
+        assert arr.to_pylist() == expected
 
     def test_garbage_collection(self):
         import gc
@@ -62,6 +68,7 @@ class TestConvertList(unittest.TestCase):
         assert len(arr) == 6
         assert arr.null_count == 3
         assert arr.type == pyarrow.double()
+        assert arr.to_pylist() == data
 
     def test_string(self):
         data = ['foo', b'bar', None, 'arrow']
@@ -69,6 +76,7 @@ class TestConvertList(unittest.TestCase):
         assert len(arr) == 4
         assert arr.null_count == 1
         assert arr.type == pyarrow.string()
+        assert arr.to_pylist() == ['foo', 'bar', None, 'arrow']
 
     def test_mixed_nesting_levels(self):
         pyarrow.from_pylist([1, 2, None])
@@ -90,3 +98,4 @@ class TestConvertList(unittest.TestCase):
         assert len(arr) == 4
         assert arr.null_count == 1
         assert arr.type == pyarrow.list_(pyarrow.int64())
+        assert arr.to_pylist() == data