You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/23 07:49:08 UTC

[arrow-rs] branch master updated: Casting fixedsizelist to list/largelist (#4433)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a3b5b1933 Casting fixedsizelist to list/largelist (#4433)
a3b5b1933 is described below

commit a3b5b193380cc4fbafa823e5b0c998b5f8e5cae3
Author: Jay Zhan <ja...@gmail.com>
AuthorDate: Fri Jun 23 15:49:02 2023 +0800

    Casting fixedsizelist to list/largelist (#4433)
    
    * fixedsizelist to list/largelist
    
    Signed-off-by: jayzhan211 <ja...@gmail.com>
    
    * address comment
    
    Signed-off-by: jayzhan211 <ja...@gmail.com>
    
    * remove typehint
    
    Signed-off-by: jayzhan211 <ja...@gmail.com>
    
    * addres ci
    
    Signed-off-by: jayzhan211 <ja...@gmail.com>
    
    * address clippy
    
    Signed-off-by: jayzhan211 <ja...@gmail.com>
    
    ---------
    
    Signed-off-by: jayzhan211 <ja...@gmail.com>
---
 arrow-array/src/cast.rs |  17 ++++++
 arrow-cast/src/cast.rs  | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+)

diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index e92e19eb3..bee8823d1 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -799,6 +799,15 @@ pub trait AsArray: private::Sealed {
         self.as_list_opt().expect("list array")
     }
 
+    /// Downcast this to a [`FixedSizeListArray`] returning `None` if not possible
+    fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray>;
+
+    /// Downcast this to a [`FixedSizeListArray`] panicking if not possible
+    fn as_fixed_size_list(&self) -> &FixedSizeListArray {
+        self.as_fixed_size_list_opt()
+            .expect("fixed size list array")
+    }
+
     /// Downcast this to a [`MapArray`] returning `None` if not possible
     fn as_map_opt(&self) -> Option<&MapArray>;
 
@@ -839,6 +848,10 @@ impl AsArray for dyn Array + '_ {
         self.as_any().downcast_ref()
     }
 
+    fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
+        self.as_any().downcast_ref()
+    }
+
     fn as_map_opt(&self) -> Option<&MapArray> {
         self.as_any().downcast_ref()
     }
@@ -872,6 +885,10 @@ impl AsArray for ArrayRef {
         self.as_ref().as_list_opt()
     }
 
+    fn as_fixed_size_list_opt(&self) -> Option<&FixedSizeListArray> {
+        self.as_ref().as_fixed_size_list_opt()
+    }
+
     fn as_map_opt(&self) -> Option<&MapArray> {
         self.as_any().downcast_ref()
     }
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index dea3f2acf..95c0a63a3 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -141,6 +141,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             can_cast_types(list_from.data_type(), to_type)
         }
         (List(_), _) => false,
+        (FixedSizeList(list_from,_), List(list_to)) => {
+            list_from.data_type() == list_to.data_type()
+        }
+        (FixedSizeList(list_from,_), LargeList(list_to)) => {
+            list_from.data_type() == list_to.data_type()
+        }
         (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
         (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
         // cast one decimal type to another decimal type
@@ -824,6 +830,25 @@ pub fn cast_with_options(
                 "Cannot cast list to non-list data types".to_string(),
             )),
         },
+        (FixedSizeList(list_from, _), List(list_to)) => {
+            if list_to.data_type() != list_from.data_type() {
+                Err(ArrowError::CastError(
+                    "cannot cast fixed-size-list to list with different child data".into(),
+                ))
+            } else {
+                cast_fixed_size_list_to_list::<i32>(array)
+            }
+        }
+        (FixedSizeList(list_from, _), LargeList(list_to)) => {
+            if list_to.data_type() != list_from.data_type() {
+                Err(ArrowError::CastError(
+                    "cannot cast fixed-size-list to largelist with different child data".into(),
+                ))
+            } else {
+                cast_fixed_size_list_to_list::<i64>(array)
+            }
+        }
+
         (_, List(ref to)) => {
             cast_primitive_to_list::<i32>(array, to, to_type, cast_options)
         }
@@ -3822,6 +3847,17 @@ where
     Ok(Arc::new(GenericByteArray::<TO>::from(array_data)))
 }
 
+fn cast_fixed_size_list_to_list<OffsetSize>(
+    array: &dyn Array,
+) -> Result<ArrayRef, ArrowError>
+where
+    OffsetSize: OffsetSizeTrait,
+{
+    let fixed_size_list: &FixedSizeListArray = array.as_fixed_size_list();
+    let list: GenericListArray<OffsetSize> = fixed_size_list.clone().into();
+    Ok(Arc::new(list))
+}
+
 /// Cast the container type of List/Largelist array but not the inner types.
 /// This function can leave the value data intact and only has to cast the offset dtypes.
 fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
@@ -7847,6 +7883,71 @@ mod tests {
         assert!(!c.is_valid(5)); // "2000-01-01"
     }
 
+    #[test]
+    fn test_can_cast_types_fixed_size_list_to_list() {
+        // DataType::List
+        let array1 = Arc::new(make_fixed_size_list_array()) as ArrayRef;
+        assert!(can_cast_types(
+            array1.data_type(),
+            &DataType::List(Arc::new(Field::new("", DataType::Int32, false)))
+        ));
+
+        // DataType::LargeList
+        let array2 = Arc::new(make_fixed_size_list_array_for_large_list()) as ArrayRef;
+        assert!(can_cast_types(
+            array2.data_type(),
+            &DataType::LargeList(Arc::new(Field::new("", DataType::Int64, false)))
+        ));
+    }
+
+    #[test]
+    fn test_cast_fixed_size_list_to_list() {
+        // DataType::List
+        let array1 = Arc::new(make_fixed_size_list_array()) as ArrayRef;
+        let list_array1 = cast(
+            &array1,
+            &DataType::List(Arc::new(Field::new("", DataType::Int32, false))),
+        )
+        .unwrap();
+        let actual = list_array1.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected = array1
+            .as_any()
+            .downcast_ref::<FixedSizeListArray>()
+            .unwrap();
+
+        assert_eq!(expected.values(), actual.values());
+        assert_eq!(expected.len(), actual.len());
+
+        // DataType::LargeList
+        let array2 = Arc::new(make_fixed_size_list_array_for_large_list()) as ArrayRef;
+        let list_array2 = cast(
+            &array2,
+            &DataType::LargeList(Arc::new(Field::new("", DataType::Int64, false))),
+        )
+        .unwrap();
+        let actual = list_array2
+            .as_any()
+            .downcast_ref::<LargeListArray>()
+            .unwrap();
+        let expected = array2
+            .as_any()
+            .downcast_ref::<FixedSizeListArray>()
+            .unwrap();
+        assert_eq!(expected.values(), actual.values());
+        assert_eq!(expected.len(), actual.len());
+
+        // Cast previous LargeList to List
+        let array3 = Arc::new(actual.clone()) as ArrayRef;
+        let list_array3 = cast(
+            &array3,
+            &DataType::List(Arc::new(Field::new("", DataType::Int64, false))),
+        )
+        .unwrap();
+        let actual = list_array3.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected = array3.as_any().downcast_ref::<LargeListArray>().unwrap();
+        assert_eq!(expected.values(), actual.values());
+    }
+
     #[test]
     fn test_cast_list_containers() {
         // large-list to list
@@ -7929,6 +8030,46 @@ mod tests {
         LargeListArray::from(list_data)
     }
 
+    fn make_fixed_size_list_array() -> FixedSizeListArray {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int32)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        let list_data_type = DataType::FixedSizeList(
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            4,
+        );
+        let list_data = ArrayData::builder(list_data_type)
+            .len(2)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+        FixedSizeListArray::from(list_data)
+    }
+
+    fn make_fixed_size_list_array_for_large_list() -> FixedSizeListArray {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int64)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0i64, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        let list_data_type = DataType::FixedSizeList(
+            Arc::new(Field::new("item", DataType::Int64, true)),
+            4,
+        );
+        let list_data = ArrayData::builder(list_data_type)
+            .len(2)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+        FixedSizeListArray::from(list_data)
+    }
+
     #[test]
     fn test_utf8_cast_offsets() {
         // test if offset of the array is taken into account during cast