You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/07/19 14:34:22 UTC
[arrow-rs] branch master updated: feat: Support `FixedSizedListArray` for `length` kernel (#4520)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 730941f09a feat: Support `FixedSizedListArray` for `length` kernel (#4520)
730941f09a is described below
commit 730941f09a88abfdce6e8c3f24cb57e5a97859ad
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Wed Jul 19 22:34:16 2023 +0800
feat: Support `FixedSizedListArray` for `length` kernel (#4520)
* feat: Support FixedSizedListArray for length kernel
* fix clippy
* update comment
* avoid unsafe
* reduce useless trait
* remove T
---
arrow-string/src/length.rs | 45 +++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 43 insertions(+), 2 deletions(-)
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 90efdd7b67..25d6414ec8 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -17,8 +17,8 @@
//! Defines kernel for length of string arrays and binary arrays
-use arrow_array::types::*;
use arrow_array::*;
+use arrow_array::{cast::AsArray, types::*};
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
@@ -88,6 +88,14 @@ where
unary_offsets!(array, T::DATA_TYPE, |x| x)
}
+fn length_list_fixed_size(array: &dyn Array, length: i32) -> ArrayRef {
+ let array = array.as_fixed_size_list();
+ let length_list = array.len();
+ let buffer = Buffer::from_vec(vec![length; length_list]);
+ let data = Int32Array::new(buffer.into(), array.nulls().cloned());
+ Arc::new(data)
+}
+
fn length_binary<O, T>(array: &dyn Array) -> ArrayRef
where
O: OffsetSizeTrait,
@@ -146,7 +154,7 @@ where
/// For list array, length is the number of elements in each list.
/// For string array and binary array, length is the number of bytes of each value.
///
-/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray and BinaryArray/LargeBinaryArray,
+/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray,
/// or DictionaryArray with above Arrays as values
/// * length of null is null.
pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
@@ -172,6 +180,7 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
DataType::LargeUtf8 => Ok(length_string::<i64, Int64Type>(array)),
DataType::Binary => Ok(length_binary::<i32, Int32Type>(array)),
DataType::LargeBinary => Ok(length_binary::<i64, Int64Type>(array)),
+ DataType::FixedSizeList(_, len) => Ok(length_list_fixed_size(array, *len)),
other => Err(ArrowError::ComputeError(format!(
"length not supported for {other:?}"
))),
@@ -215,6 +224,8 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
mod tests {
use super::*;
use arrow_array::cast::AsArray;
+ use arrow_buffer::NullBuffer;
+ use arrow_schema::Field;
fn double_vec<T: Clone>(v: Vec<T>) -> Vec<T> {
[&v[..], &v[..]].concat()
@@ -696,4 +707,34 @@ mod tests {
assert_eq!(expected[i], actual[i],);
}
}
+
+ #[test]
+ fn test_fixed_size_list_length() {
+ // Construct a value array
+ let value_data = ArrayData::builder(DataType::Int32)
+ .len(9)
+ .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
+ .build()
+ .unwrap();
+ let list_data_type = DataType::FixedSizeList(
+ Arc::new(Field::new("item", DataType::Int32, false)),
+ 3,
+ );
+ let nulls = NullBuffer::from(vec![true, false, true]);
+ let list_data = ArrayData::builder(list_data_type)
+ .len(3)
+ .add_child_data(value_data)
+ .nulls(Some(nulls))
+ .build()
+ .unwrap();
+ let list_array = FixedSizeListArray::from(list_data);
+
+ let lengths = length(&list_array).unwrap();
+ let lengths = lengths.as_any().downcast_ref::<Int32Array>().unwrap();
+
+ assert_eq!(lengths.len(), 3);
+ assert_eq!(lengths.value(0), 3);
+ assert!(lengths.is_null(1));
+ assert_eq!(lengths.value(2), 3);
+ }
}