You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/05/14 15:47:16 UTC

[GitHub] [arrow] lidavidm commented on a change in pull request #10272: ARROW-12677: [Python] Add a mask argument to pyarrow.StructArray.from_arrays

lidavidm commented on a change in pull request #10272:
URL: https://github.com/apache/arrow/pull/10272#discussion_r632622802



##########
File path: python/pyarrow/tests/test_array.py
##########
@@ -932,6 +954,24 @@ def test_fixed_size_list_from_arrays():
         pa.FixedSizeListArray.from_arrays(values, 5)
 
 
+def test_variable_list_from_arrays():
+    values = pa.array([1, 2, 3, 4], pa.int64())
+    offsets = pa.array([0, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], [3, 4]]
+    assert result.type.equals(pa.list_(pa.int64()))
+
+    offsets = pa.array([0, None, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], None, [3, 4]]
+
+    # raise if offset out of bounds
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([-1, 2, 4]), values)
+
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([0, 2, 5]), values)
+

Review comment:
       ```suggestion
   
   
   ```

##########
File path: python/pyarrow/array.pxi
##########
@@ -2189,6 +2227,18 @@ cdef class StructArray(Array):
         if names is not None and fields is not None:
             raise ValueError('Must pass either names or fields, not both')
 
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type != bool_():

Review comment:
       nit: maybe pa.types.is_boolean?

##########
File path: python/pyarrow/array.pxi
##########
@@ -2189,6 +2227,18 @@ cdef class StructArray(Array):
         if names is not None and fields is not None:
             raise ValueError('Must pass either names or fields, not both')
 
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type != bool_():
+                raise ValueError('Mask must be a pyarray.Array of type bool')
+            if mask.null_count != 0:
+                raise ValueError('Mask must not contain nulls')
+            inverted_mask = _pc().invert(mask, memory_pool=memory_pool)
+            c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+        else:
+            raise ValueError('Mask must be a pyarray.Array of type bool')

Review comment:
       ```suggestion
               raise ValueError('Mask must be a pyarrow.Array of type bool')
   ```

##########
File path: python/pyarrow/array.pxi
##########
@@ -2189,6 +2227,18 @@ cdef class StructArray(Array):
         if names is not None and fields is not None:
             raise ValueError('Must pass either names or fields, not both')
 
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type != bool_():
+                raise ValueError('Mask must be a pyarray.Array of type bool')
+            if mask.null_count != 0:
+                raise ValueError('Mask must not contain nulls')
+            inverted_mask = _pc().invert(mask, memory_pool=memory_pool)
+            c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+        else:
+            raise ValueError('Mask must be a pyarray.Array of type bool')

Review comment:
       nit: maybe also include an `'(expected pyarrow.Array of type bool, got {type(mask)})'` (this is semi-consistently done in PyArrow)

##########
File path: python/pyarrow/array.pxi
##########
@@ -2189,6 +2227,18 @@ cdef class StructArray(Array):
         if names is not None and fields is not None:
             raise ValueError('Must pass either names or fields, not both')
 
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type != bool_():
+                raise ValueError('Mask must be a pyarray.Array of type bool')

Review comment:
       ```suggestion
                   raise ValueError('Mask must be a pyarrow.Array of type bool')
   ```

##########
File path: python/pyarrow/tests/test_array.py
##########
@@ -932,6 +954,24 @@ def test_fixed_size_list_from_arrays():
         pa.FixedSizeListArray.from_arrays(values, 5)
 
 
+def test_variable_list_from_arrays():
+    values = pa.array([1, 2, 3, 4], pa.int64())
+    offsets = pa.array([0, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], [3, 4]]
+    assert result.type.equals(pa.list_(pa.int64()))
+
+    offsets = pa.array([0, None, 2, 4])
+    result = pa.ListArray.from_arrays(offsets, values)
+    assert result.to_pylist() == [[1, 2], None, [3, 4]]
+
+    # raise if offset out of bounds
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([-1, 2, 4]), values)
+
+    with pytest.raises(ValueError):
+        pa.ListArray.from_arrays(pa.array([0, 2, 5]), values)
+

Review comment:
       (just to fix the lint error)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org