You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by yj...@apache.org on 2022/04/18 05:50:30 UTC

[arrow-rs] branch master updated: Support casting to/from `DataType::Null` in `cast` kernel (#1572)

This is an automated email from the ASF dual-hosted git repository.

yjshen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 8bed7ea9d Support casting to/from `DataType::Null` in `cast` kernel (#1572)
8bed7ea9d is described below

commit 8bed7ea9d7c7f16cf9987a7150b4b4ec614ae723
Author: DuRipeng <45...@qq.com>
AuthorDate: Mon Apr 18 13:50:24 2022 +0800

    Support casting to/from `DataType::Null` in `cast` kernel (#1572)
    
    * cast null from and to others
    
    * fmt fix
    
    * add more ut
    
    Co-authored-by: duripeng <du...@baidu.com>
---
 arrow/src/compute/kernels/cast.rs | 130 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 1 deletion(-)

diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index 6fb2a4e90..50786f3d9 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -91,7 +91,20 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             | UInt64
             | Float64
             | Date64
+            | Timestamp(_, _)
+            | Time64(_)
+            | Duration(_)
+            | Interval(_)
+            | FixedSizeBinary(_)
+            | Binary
+            | Utf8
+            | LargeBinary
+            | LargeUtf8
             | List(_)
+            | LargeList(_)
+            | FixedSizeList(_, _)
+            | Struct(_)
+            | Map(_, _)
             | Dictionary(_, _),
         )
         | (
@@ -109,7 +122,20 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             | UInt64
             | Float64
             | Date64
+            | Timestamp(_, _)
+            | Time64(_)
+            | Duration(_)
+            | Interval(_)
+            | FixedSizeBinary(_)
+            | Binary
+            | Utf8
+            | LargeBinary
+            | LargeUtf8
             | List(_)
+            | LargeList(_)
+            | FixedSizeList(_, _)
+            | Struct(_)
+            | Map(_, _)
             | Dictionary(_, _),
             Null,
         ) => true,
@@ -469,7 +495,20 @@ pub fn cast_with_options(
             | UInt64
             | Float64
             | Date64
+            | Timestamp(_, _)
+            | Time64(_)
+            | Duration(_)
+            | Interval(_)
+            | FixedSizeBinary(_)
+            | Binary
+            | Utf8
+            | LargeBinary
+            | LargeUtf8
             | List(_)
+            | LargeList(_)
+            | FixedSizeList(_, _)
+            | Struct(_)
+            | Map(_, _)
             | Dictionary(_, _),
         )
         | (
@@ -487,7 +526,20 @@ pub fn cast_with_options(
             | UInt64
             | Float64
             | Date64
+            | Timestamp(_, _)
+            | Time64(_)
+            | Duration(_)
+            | Interval(_)
+            | FixedSizeBinary(_)
+            | Binary
+            | Utf8
+            | LargeBinary
+            | LargeUtf8
             | List(_)
+            | LargeList(_)
+            | FixedSizeList(_, _)
+            | Struct(_)
+            | Map(_, _)
             | Dictionary(_, _),
             Null,
         ) => Ok(new_null_array(to_type, array.len())),
@@ -4209,7 +4261,7 @@ mod tests {
     }
 
     #[test]
-    fn test_cast_null_array_from_and_to_others() {
+    fn test_cast_null_array_from_and_to_primitive_array() {
         macro_rules! typed_test {
             ($ARR_TYPE:ident, $DATATYPE:ident, $TYPE:tt) => {{
                 {
@@ -4242,6 +4294,82 @@ mod tests {
 
         typed_test!(Float32Array, Float32, Float32Type);
         typed_test!(Float64Array, Float64, Float64Type);
+
+        typed_test!(Date32Array, Date32, Date32Type);
+        typed_test!(Date64Array, Date64, Date64Type);
+    }
+
+    fn cast_from_and_to_null(data_type: &DataType) {
+        // Cast from data_type to null
+        {
+            let array = new_null_array(data_type, 4);
+            assert_eq!(array.data_type(), data_type);
+            let cast_array = cast(&array, &DataType::Null).expect("cast failed");
+            assert_eq!(cast_array.data_type(), &DataType::Null);
+            for i in 0..4 {
+                assert!(cast_array.is_null(i));
+            }
+        }
+        // Cast from null to data_type
+        {
+            let array = new_null_array(&DataType::Null, 4);
+            assert_eq!(array.data_type(), &DataType::Null);
+            let cast_array = cast(&array, data_type).expect("cast failed");
+            assert_eq!(cast_array.data_type(), data_type);
+            for i in 0..4 {
+                assert!(cast_array.is_null(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_cast_null_from_and_to_variable_sized() {
+        cast_from_and_to_null(&DataType::Utf8);
+        cast_from_and_to_null(&DataType::LargeUtf8);
+        cast_from_and_to_null(&DataType::Binary);
+        cast_from_and_to_null(&DataType::LargeBinary);
+    }
+
+    #[test]
+    fn test_cast_null_from_and_to_nested_type() {
+        // Cast null from and to map
+        let data_type = DataType::Map(
+            Box::new(Field::new(
+                "entry",
+                DataType::Struct(vec![
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new("value", DataType::Int32, true),
+                ]),
+                false,
+            )),
+            false,
+        );
+        cast_from_and_to_null(&data_type);
+
+        // Cast null from and to list
+        let data_type =
+            DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
+        cast_from_and_to_null(&data_type);
+        let data_type =
+            DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
+        cast_from_and_to_null(&data_type);
+        let data_type = DataType::FixedSizeList(
+            Box::new(Field::new("item", DataType::Int32, true)),
+            4,
+        );
+        cast_from_and_to_null(&data_type);
+
+        // Cast null from and to dictionary
+        let values = vec![None, None, None, None] as Vec<Option<&str>>;
+        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
+        let array = Arc::new(array) as ArrayRef;
+        let data_type = array.data_type().to_owned();
+        cast_from_and_to_null(&data_type);
+
+        // Cast null from and to struct
+        let data_type =
+            DataType::Struct(vec![Field::new("data", DataType::Int64, false)]);
+        cast_from_and_to_null(&data_type);
     }
 
     /// Print the `DictionaryArray` `array` as a vector of strings