You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/01/10 17:07:31 UTC

(arrow) branch main updated: GH-39515: [Python] Pass in type to `MapType.from_arrays` (#39516)

This is an automated email from the ASF dual-hosted git repository.

alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 07a46555e7 GH-39515: [Python] Pass in type to `MapType.from_arrays` (#39516)
07a46555e7 is described below

commit 07a46555e74501f96973dc43ef54a4669d261876
Author: Fokko Driesprong <fo...@tabular.io>
AuthorDate: Wed Jan 10 09:07:24 2024 -0800

    GH-39515: [Python] Pass in type to `MapType.from_arrays` (#39516)
    
    
    
    ### Rationale for this change
    
    For Iceberg we want to add metadata type the type (the field-id), therefore we need to pass in the type analog to what we do for `ListArray.from_arrays(self, offsets, values, DataType type=None, MemoryPool pool=None, mask=None)`.
    
    ### What changes are included in this PR?
    
    Updated a keyword argument for the `type`, and make sure that the the static method to create the MapType is exposed from the cpp side.
    
    ### Are these changes tested?
    
    I've added a simple test.
    
    ### Are there any user-facing changes?
    
    * Closes: #39515
    
    Authored-by: Fokko Driesprong <fo...@tabular.io>
    Signed-off-by: AlenkaF <fr...@gmail.com>
---
 python/pyarrow/array.pxi             | 21 +++++++++++++++------
 python/pyarrow/includes/libarrow.pxd |  8 ++++++++
 python/pyarrow/tests/test_array.py   | 19 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 751dfbcce4..5c2d22aef1 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2467,7 +2467,7 @@ cdef class MapArray(ListArray):
     """
 
     @staticmethod
-    def from_arrays(offsets, keys, items, MemoryPool pool=None):
+    def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None):
         """
         Construct MapArray from arrays of int32 offsets and key, item arrays.
 
@@ -2476,6 +2476,8 @@ cdef class MapArray(ListArray):
         offsets : array-like or sequence (int32 type)
         keys : array-like or sequence (any type)
         items : array-like or sequence (any type)
+        type : DataType, optional
+            If not specified, a default MapArray with the keys' and items' type is used.
         pool : MemoryPool
 
         Returns
@@ -2564,11 +2566,18 @@ cdef class MapArray(ListArray):
         _keys = asarray(keys)
         _items = asarray(items)
 
-        with nogil:
-            out = GetResultValue(
-                CMapArray.FromArrays(_offsets.sp_array,
-                                     _keys.sp_array,
-                                     _items.sp_array, cpool))
+        if type is not None:
+            with nogil:
+                out = GetResultValue(
+                    CMapArray.FromArraysAndType(
+                        type.sp_type, _offsets.sp_array,
+                        _keys.sp_array, _items.sp_array, cpool))
+        else:
+            with nogil:
+                out = GetResultValue(
+                    CMapArray.FromArrays(_offsets.sp_array,
+                                         _keys.sp_array,
+                                         _items.sp_array, cpool))
         cdef Array result = pyarrow_wrap_array(out)
         result.validate()
         return result
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 82b888f584..74e92594b0 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -696,6 +696,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
             const shared_ptr[CArray]& items,
             CMemoryPool* pool)
 
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+            shared_ptr[CDataType],
+            const shared_ptr[CArray]& offsets,
+            const shared_ptr[CArray]& keys,
+            const shared_ptr[CArray]& items,
+            CMemoryPool* pool)
+
         shared_ptr[CArray] keys()
         shared_ptr[CArray] items()
         CMapType* map_type()
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 3dcbf399f3..f851d4e0b6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1057,8 +1057,25 @@ def test_map_from_arrays():
 
     assert result.equals(expected)
 
-    # check invalid usage
+    # pass in the type explicitly
+    result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        keys.type,
+        items.type
+    ))
+    assert result.equals(expected)
+
+    # pass in invalid types
+    with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
+        pa.MapArray.from_arrays(offsets, keys, items, pa.string())
 
+    with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+            keys.type,
+            # Larger than the original i4
+            pa.int64()
+        ))
+
+    # check invalid usage
     offsets = [0, 1, 3, 5]
     keys = np.arange(5)
     items = np.arange(5)