You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/01/10 17:07:31 UTC
(arrow) branch main updated: GH-39515: [Python] Pass in type to `MapType.from_arrays` (#39516)
This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 07a46555e7 GH-39515: [Python] Pass in type to `MapType.from_arrays` (#39516)
07a46555e7 is described below
commit 07a46555e74501f96973dc43ef54a4669d261876
Author: Fokko Driesprong <fo...@tabular.io>
AuthorDate: Wed Jan 10 09:07:24 2024 -0800
GH-39515: [Python] Pass in type to `MapType.from_arrays` (#39516)
### Rationale for this change
For Iceberg we want to add metadata type the type (the field-id), therefore we need to pass in the type analog to what we do for `ListArray.from_arrays(self, offsets, values, DataType type=None, MemoryPool pool=None, mask=None)`.
### What changes are included in this PR?
Updated a keyword argument for the `type`, and make sure that the the static method to create the MapType is exposed from the cpp side.
### Are these changes tested?
I've added a simple test.
### Are there any user-facing changes?
* Closes: #39515
Authored-by: Fokko Driesprong <fo...@tabular.io>
Signed-off-by: AlenkaF <fr...@gmail.com>
---
python/pyarrow/array.pxi | 21 +++++++++++++++------
python/pyarrow/includes/libarrow.pxd | 8 ++++++++
python/pyarrow/tests/test_array.py | 19 ++++++++++++++++++-
3 files changed, 41 insertions(+), 7 deletions(-)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 751dfbcce4..5c2d22aef1 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2467,7 +2467,7 @@ cdef class MapArray(ListArray):
"""
@staticmethod
- def from_arrays(offsets, keys, items, MemoryPool pool=None):
+ def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None):
"""
Construct MapArray from arrays of int32 offsets and key, item arrays.
@@ -2476,6 +2476,8 @@ cdef class MapArray(ListArray):
offsets : array-like or sequence (int32 type)
keys : array-like or sequence (any type)
items : array-like or sequence (any type)
+ type : DataType, optional
+ If not specified, a default MapArray with the keys' and items' type is used.
pool : MemoryPool
Returns
@@ -2564,11 +2566,18 @@ cdef class MapArray(ListArray):
_keys = asarray(keys)
_items = asarray(items)
- with nogil:
- out = GetResultValue(
- CMapArray.FromArrays(_offsets.sp_array,
- _keys.sp_array,
- _items.sp_array, cpool))
+ if type is not None:
+ with nogil:
+ out = GetResultValue(
+ CMapArray.FromArraysAndType(
+ type.sp_type, _offsets.sp_array,
+ _keys.sp_array, _items.sp_array, cpool))
+ else:
+ with nogil:
+ out = GetResultValue(
+ CMapArray.FromArrays(_offsets.sp_array,
+ _keys.sp_array,
+ _items.sp_array, cpool))
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 82b888f584..74e92594b0 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -696,6 +696,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
const shared_ptr[CArray]& items,
CMemoryPool* pool)
+ @staticmethod
+ CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+ shared_ptr[CDataType],
+ const shared_ptr[CArray]& offsets,
+ const shared_ptr[CArray]& keys,
+ const shared_ptr[CArray]& items,
+ CMemoryPool* pool)
+
shared_ptr[CArray] keys()
shared_ptr[CArray] items()
CMapType* map_type()
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 3dcbf399f3..f851d4e0b6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1057,8 +1057,25 @@ def test_map_from_arrays():
assert result.equals(expected)
- # check invalid usage
+ # pass in the type explicitly
+ result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+ keys.type,
+ items.type
+ ))
+ assert result.equals(expected)
+
+ # pass in invalid types
+ with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
+ pa.MapArray.from_arrays(offsets, keys, items, pa.string())
+ with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
+ pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+ keys.type,
+ # Larger than the original i4
+ pa.int64()
+ ))
+
+ # check invalid usage
offsets = [0, 1, 3, 5]
keys = np.arange(5)
items = np.arange(5)