You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/23 07:49:08 UTC
[arrow-rs] branch master updated: Casting fixedsizelist to list/largelist (#4433)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a3b5b1933 Casting fixedsizelist to list/largelist (#4433)
a3b5b1933 is described below
commit a3b5b193380cc4fbafa823e5b0c998b5f8e5cae3
Author: Jay Zhan <ja...@gmail.com>
AuthorDate: Fri Jun 23 15:49:02 2023 +0800
Casting fixedsizelist to list/largelist (#4433)
* fixedsizelist to list/largelist
Signed-off-by: jayzhan211 <ja...@gmail.com>
* address comment
Signed-off-by: jayzhan211 <ja...@gmail.com>
* remove typehint
Signed-off-by: jayzhan211 <ja...@gmail.com>
* addres ci
Signed-off-by: jayzhan211 <ja...@gmail.com>
* address clippy
Signed-off-by: jayzhan211 <ja...@gmail.com>
---------
Signed-off-by: jayzhan211 <ja...@gmail.com>
---
arrow-array/src/cast.rs | 17 ++++++
arrow-cast/src/cast.rs | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 158 insertions(+)
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index e92e19eb3..bee8823d1 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -799,6 +799,15 @@ pub trait AsArray: private::Sealed {
self.as_list_opt().expect("list array")
}
+ /// Downcast this to a [`FixedSizeListArray`] returning `None` if not possible
+ fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray>;
+
+ /// Downcast this to a [`FixedSizeListArray`] panicking if not possible
+ fn as_fixed_size_list(&self) -> &FixedSizeListArray {
+ self.as_fixed_size_list_opt()
+ .expect("fixed size list array")
+ }
+
/// Downcast this to a [`MapArray`] returning `None` if not possible
fn as_map_opt(&self) -> Option<&MapArray>;
@@ -839,6 +848,10 @@ impl AsArray for dyn Array + '_ {
self.as_any().downcast_ref()
}
+ fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
+ self.as_any().downcast_ref()
+ }
+
fn as_map_opt(&self) -> Option<&MapArray> {
self.as_any().downcast_ref()
}
@@ -872,6 +885,10 @@ impl AsArray for ArrayRef {
self.as_ref().as_list_opt()
}
+ fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
+ self.as_ref().as_fixed_size_list_opt()
+ }
+
fn as_map_opt(&self) -> Option<&MapArray> {
self.as_any().downcast_ref()
}
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index dea3f2acf..95c0a63a3 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -141,6 +141,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
can_cast_types(list_from.data_type(), to_type)
}
(List(_), _) => false,
+ (FixedSizeList(list_from,_), List(list_to)) => {
+ list_from.data_type() == list_to.data_type()
+ }
+ (FixedSizeList(list_from,_), LargeList(list_to)) => {
+ list_from.data_type() == list_to.data_type()
+ }
(_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
(_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
// cast one decimal type to another decimal type
@@ -824,6 +830,25 @@ pub fn cast_with_options(
"Cannot cast list to non-list data types".to_string(),
)),
},
+ (FixedSizeList(list_from, _), List(list_to)) => {
+ if list_to.data_type() != list_from.data_type() {
+ Err(ArrowError::CastError(
+ "cannot cast fixed-size-list to list with different child data".into(),
+ ))
+ } else {
+ cast_fixed_size_list_to_list::<i32>(array)
+ }
+ }
+ (FixedSizeList(list_from, _), LargeList(list_to)) => {
+ if list_to.data_type() != list_from.data_type() {
+ Err(ArrowError::CastError(
+ "cannot cast fixed-size-list to largelist with different child data".into(),
+ ))
+ } else {
+ cast_fixed_size_list_to_list::<i64>(array)
+ }
+ }
+
(_, List(ref to)) => {
cast_primitive_to_list::<i32>(array, to, to_type, cast_options)
}
@@ -3822,6 +3847,17 @@ where
Ok(Arc::new(GenericByteArray::<TO>::from(array_data)))
}
+fn cast_fixed_size_list_to_list<OffsetSize>(
+ array: &dyn Array,
+) -> Result<ArrayRef, ArrowError>
+where
+ OffsetSize: OffsetSizeTrait,
+{
+ let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
+ let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
+ Ok(Arc::new(list))
+}
+
/// Cast the container type of List/Largelist array but not the inner types.
/// This function can leave the value data intact and only has to cast the offset dtypes.
fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
@@ -7847,6 +7883,71 @@ mod tests {
assert!(!c.is_valid(5)); // "2000-01-01"
}
+ #[test]
+ fn test_can_cast_types_fixed_size_list_to_list() {
+ // DataType::List
+ let array1 = Arc::new(make_fixed_size_list_array()) as ArrayRef;
+ assert!(can_cast_types(
+ array1.data_type(),
+ &DataType::List(Arc::new(Field::new("", DataType::Int32, false)))
+ ));
+
+ // DataType::LargeList
+ let array2 = Arc::new(make_fixed_size_list_array_for_large_list()) as ArrayRef;
+ assert!(can_cast_types(
+ array2.data_type(),
+ &DataType::LargeList(Arc::new(Field::new("", DataType::Int64, false)))
+ ));
+ }
+
+ #[test]
+ fn test_cast_fixed_size_list_to_list() {
+ // DataType::List
+ let array1 = Arc::new(make_fixed_size_list_array()) as ArrayRef;
+ let list_array1 = cast(
+ &array1,
+ &DataType::List(Arc::new(Field::new("", DataType::Int32, false))),
+ )
+ .unwrap();
+ let actual = list_array1.as_any().downcast_ref::<ListArray>().unwrap();
+ let expected = array1
+ .as_any()
+ .downcast_ref::<FixedSizeListArray>()
+ .unwrap();
+
+ assert_eq!(expected.values(), actual.values());
+ assert_eq!(expected.len(), actual.len());
+
+ // DataType::LargeList
+ let array2 = Arc::new(make_fixed_size_list_array_for_large_list()) as ArrayRef;
+ let list_array2 = cast(
+ &array2,
+ &DataType::LargeList(Arc::new(Field::new("", DataType::Int64, false))),
+ )
+ .unwrap();
+ let actual = list_array2
+ .as_any()
+ .downcast_ref::<LargeListArray>()
+ .unwrap();
+ let expected = array2
+ .as_any()
+ .downcast_ref::<FixedSizeListArray>()
+ .unwrap();
+ assert_eq!(expected.values(), actual.values());
+ assert_eq!(expected.len(), actual.len());
+
+ // Cast previous LargeList to List
+ let array3 = Arc::new(actual.clone()) as ArrayRef;
+ let list_array3 = cast(
+ &array3,
+ &DataType::List(Arc::new(Field::new("", DataType::Int64, false))),
+ )
+ .unwrap();
+ let actual = list_array3.as_any().downcast_ref::<ListArray>().unwrap();
+ let expected = array3.as_any().downcast_ref::<LargeListArray>().unwrap();
+ assert_eq!(expected.values(), actual.values());
+ }
+
#[test]
fn test_cast_list_containers() {
// large-list to list
@@ -7929,6 +8030,46 @@ mod tests {
LargeListArray::from(list_data)
}
+ fn make_fixed_size_list_array() -> FixedSizeListArray {
+ // Construct a value array
+ let value_data = ArrayData::builder(DataType::Int32)
+ .len(8)
+ .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
+ .build()
+ .unwrap();
+
+ let list_data_type = DataType::FixedSizeList(
+ Arc::new(Field::new("item", DataType::Int32, true)),
+ 4,
+ );
+ let list_data = ArrayData::builder(list_data_type)
+ .len(2)
+ .add_child_data(value_data)
+ .build()
+ .unwrap();
+ FixedSizeListArray::from(list_data)
+ }
+
+ fn make_fixed_size_list_array_for_large_list() -> FixedSizeListArray {
+ // Construct a value array
+ let value_data = ArrayData::builder(DataType::Int64)
+ .len(8)
+ .add_buffer(Buffer::from_slice_ref([0i64, 1, 2, 3, 4, 5, 6, 7]))
+ .build()
+ .unwrap();
+
+ let list_data_type = DataType::FixedSizeList(
+ Arc::new(Field::new("item", DataType::Int64, true)),
+ 4,
+ );
+ let list_data = ArrayData::builder(list_data_type)
+ .len(2)
+ .add_child_data(value_data)
+ .build()
+ .unwrap();
+ FixedSizeListArray::from(list_data)
+ }
+
#[test]
fn test_utf8_cast_offsets() {
// test if offset of the array is taken into account during cast