You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/07/19 14:34:22 UTC

[arrow-rs] branch master updated: feat: Support `FixedSizedListArray` for `length` kernel (#4520)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 730941f09a feat: Support `FixedSizedListArray` for `length` kernel (#4520)
730941f09a is described below

commit 730941f09a88abfdce6e8c3f24cb57e5a97859ad
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Wed Jul 19 22:34:16 2023 +0800

    feat: Support `FixedSizedListArray` for `length` kernel (#4520)
    
    * feat: Support FixedSizedListArray for length kernel
    
    * fix clippy
    
    * update comment
    
    * avoid unsafe
    
    * reduce useless trait
    
    * remove T
---
 arrow-string/src/length.rs | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 90efdd7b67..25d6414ec8 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -17,8 +17,8 @@
 
 //! Defines kernel for length of string arrays and binary arrays
 
-use arrow_array::types::*;
 use arrow_array::*;
+use arrow_array::{cast::AsArray, types::*};
 use arrow_buffer::Buffer;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
@@ -88,6 +88,14 @@ where
     unary_offsets!(array, T::DATA_TYPE, |x| x)
 }
 
+fn length_list_fixed_size(array: &dyn Array, length: i32) -> ArrayRef {
+    let array = array.as_fixed_size_list();
+    let length_list = array.len();
+    let buffer = Buffer::from_vec(vec![length; length_list]);
+    let data = Int32Array::new(buffer.into(), array.nulls().cloned());
+    Arc::new(data)
+}
+
 fn length_binary<O, T>(array: &dyn Array) -> ArrayRef
 where
     O: OffsetSizeTrait,
@@ -146,7 +154,7 @@ where
 /// For list array, length is the number of elements in each list.
 /// For string array and binary array, length is the number of bytes of each value.
 ///
-/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray and BinaryArray/LargeBinaryArray,
+/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray,
 ///   or DictionaryArray with above Arrays as values
 /// * length of null is null.
 pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
@@ -172,6 +180,7 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
         DataType::LargeUtf8 => Ok(length_string::<i64, Int64Type>(array)),
         DataType::Binary => Ok(length_binary::<i32, Int32Type>(array)),
         DataType::LargeBinary => Ok(length_binary::<i64, Int64Type>(array)),
+        DataType::FixedSizeList(_, len) => Ok(length_list_fixed_size(array, *len)),
         other => Err(ArrowError::ComputeError(format!(
             "length not supported for {other:?}"
         ))),
@@ -215,6 +224,8 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
 mod tests {
     use super::*;
     use arrow_array::cast::AsArray;
+    use arrow_buffer::NullBuffer;
+    use arrow_schema::Field;
 
     fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
         [&v[..], &v[..]].concat()
@@ -696,4 +707,34 @@ mod tests {
             assert_eq!(expected[i], actual[i],);
         }
     }
+
+    #[test]
+    fn test_fixed_size_list_length() {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int32)
+            .len(9)
+            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
+            .build()
+            .unwrap();
+        let list_data_type = DataType::FixedSizeList(
+            Arc::new(Field::new("item", DataType::Int32, false)),
+            3,
+        );
+        let nulls = NullBuffer::from(vec![true, false, true]);
+        let list_data = ArrayData::builder(list_data_type)
+            .len(3)
+            .add_child_data(value_data)
+            .nulls(Some(nulls))
+            .build()
+            .unwrap();
+        let list_array = FixedSizeListArray::from(list_data);
+
+        let lengths = length(&list_array).unwrap();
+        let lengths = lengths.as_any().downcast_ref::<Int32Array>().unwrap();
+
+        assert_eq!(lengths.len(), 3);
+        assert_eq!(lengths.value(0), 3);
+        assert!(lengths.is_null(1));
+        assert_eq!(lengths.value(2), 3);
+    }
 }