You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/28 00:47:46 UTC

[GitHub] [arrow-rs] alamb commented on a diff in pull request #1951: Add add_dyn for DictionaryArray support

alamb commented on code in PR #1951:
URL: https://github.com/apache/arrow-rs/pull/1951#discussion_r907917971


##########
arrow/src/compute/kernels/arithmetic.rs:
##########
@@ -423,6 +429,247 @@ where
     Ok(PrimitiveArray::<T>::from(data))
 }
 
+/// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the same) key type $KT
+macro_rules! typed_dict_op {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                let array = math_op_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Int16, DataType::Int16) => {
+                let array = math_op_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Int32, DataType::Int32) => {
+                let array = math_op_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Int64, DataType::Int64) => {
+                let array = math_op_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt8, DataType::UInt8) => {
+                let array = math_op_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt16, DataType::UInt16) => {
+                let array = math_op_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt32, DataType::UInt32) => {
+                let array = math_op_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt64, DataType::UInt64) => {
+                let array = math_op_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Float32, DataType::Float32) => {
+                let array = math_op_dict::<$KT, Float32Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Float64, DataType::Float64) => {
+                let array = math_op_dict::<$KT, Float64Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot perform arithmetic operation on two dictionary arrays of different value types ({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_math_op {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are `DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int8Type)
+                    }
+                    (DataType::Int16, DataType::Int16) => {
+                        let left = as_dictionary_array::<Int16Type>($LEFT);
+                        let right = as_dictionary_array::<Int16Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int16Type)
+                    }
+                    (DataType::Int32, DataType::Int32) => {
+                        let left = as_dictionary_array::<Int32Type>($LEFT);
+                        let right = as_dictionary_array::<Int32Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int32Type)
+                    }
+                    (DataType::Int64, DataType::Int64) => {
+                        let left = as_dictionary_array::<Int64Type>($LEFT);
+                        let right = as_dictionary_array::<Int64Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int64Type)
+                    }
+                    (DataType::UInt8, DataType::UInt8) => {
+                        let left = as_dictionary_array::<UInt8Type>($LEFT);
+                        let right = as_dictionary_array::<UInt8Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt8Type)
+                    }
+                    (DataType::UInt16, DataType::UInt16) => {
+                        let left = as_dictionary_array::<UInt16Type>($LEFT);
+                        let right = as_dictionary_array::<UInt16Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt16Type)
+                    }
+                    (DataType::UInt32, DataType::UInt32) => {
+                        let left = as_dictionary_array::<UInt32Type>($LEFT);
+                        let right = as_dictionary_array::<UInt32Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt32Type)
+                    }
+                    (DataType::UInt64, DataType::UInt64) => {
+                        let left = as_dictionary_array::<UInt64Type>($LEFT);
+                        let right = as_dictionary_array::<UInt64Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt64Type)
+                    }
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot perform arithmetic operation on two dictionary arrays of different key types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot perform arithmetic operation on dictionary array with non-dictionary array ({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_op {
+    ($LEFT: expr, $RIGHT: expr, $T: ident, $OP: expr) => {{
+        let left = $LEFT
+            .as_any()
+            .downcast_ref::<PrimitiveArray<$T>>()
+            .ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Left array cannot be cast to {}",
+                    type_name::<$T>()
+                ))
+            })?;
+        let right = $RIGHT
+            .as_any()
+            .downcast_ref::<PrimitiveArray<$T>>()
+            .ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Right array cannot be cast to {}",
+                    type_name::<$T>(),
+                ))
+            })?;
+        let array = math_op(left, right, $OP)?;
+        Ok(Arc::new(array))
+    }};
+}
+
+macro_rules! typed_math_op {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match $LEFT.data_type() {
+            DataType::Int8 => {
+                typed_op!($LEFT, $RIGHT, Int8Type, $OP)
+            }
+            DataType::Int16 => {
+                typed_op!($LEFT, $RIGHT, Int16Type, $OP)
+            }
+            DataType::Int32 => {
+                typed_op!($LEFT, $RIGHT, Int32Type, $OP)
+            }
+            DataType::Int64 => {
+                typed_op!($LEFT, $RIGHT, Int64Type, $OP)
+            }
+            DataType::UInt8 => {
+                typed_op!($LEFT, $RIGHT, UInt8Type, $OP)
+            }
+            DataType::UInt16 => {
+                typed_op!($LEFT, $RIGHT, UInt16Type, $OP)
+            }
+            DataType::UInt32 => {
+                typed_op!($LEFT, $RIGHT, UInt32Type, $OP)
+            }
+            DataType::UInt64 => {
+                typed_op!($LEFT, $RIGHT, UInt64Type, $OP)
+            }
+            DataType::Float32 => {
+                typed_op!($LEFT, $RIGHT, Float32Type, $OP)
+            }
+            DataType::Float64 => {
+                typed_op!($LEFT, $RIGHT, Float64Type, $OP)
+            }
+            t => Err(ArrowError::CastError(format!(
+                "Cannot perform arithmetic operation on arrays of type {}",
+                t
+            ))),
+        }
+    }};
+}
+
+/// Helper function to perform boolean lambda function on values from two dictionary arrays, this
+/// version does not attempt to use SIMD explicitly (though the compiler may auto vectorize)
+macro_rules! math_dict_op {
+    ($left: expr, $right:expr, $op:expr, $value_ty:ty) => {{
+        if $left.len() != $right.len() {
+            return Err(ArrowError::ComputeError(format!(
+                "Cannot perform operation on arrays of different length ({}, {})",
+                $left.len(),
+                $right.len()
+            )));
+        }
+
+        // Safety justification: Since the inputs are valid Arrow arrays, all values are
+        // valid indexes into the dictionary (which is verified during construction)
+
+        let left_iter = unsafe {
+            $left
+                .values()
+                .as_any()
+                .downcast_ref::<$value_ty>()
+                .unwrap()
+                .take_iter_unchecked($left.keys_iter())
+        };
+
+        let right_iter = unsafe {
+            $right
+                .values()
+                .as_any()
+                .downcast_ref::<$value_ty>()
+                .unwrap()
+                .take_iter_unchecked($right.keys_iter())
+        };
+
+        let result = left_iter

Review Comment:
   this is very nice 👌 



##########
arrow/src/compute/kernels/arithmetic.rs:
##########
@@ -634,6 +892,50 @@ mod tests {
         assert_eq!(17, c.value(4));
     }
 
+    #[test]
+    fn test_primitive_array_add_dyn() {
+        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
+        let b = Int32Array::from(vec![6, 7, 8, 9, 8]);
+        let c = add_dyn(&a, &b).unwrap();
+        let c = c.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(11, c.value(0));
+        assert_eq!(13, c.value(1));
+        assert_eq!(15, c.value(2));
+        assert_eq!(17, c.value(3));
+        assert_eq!(17, c.value(4));
+    }
+
+    #[test]
+    fn test_primitive_array_add_dyn_dict() {
+        let key_builder = PrimitiveBuilder::<Int8Type>::new(3);
+        let value_builder = PrimitiveBuilder::<Int32Type>::new(2);
+        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);

Review Comment:
   I think it would be good to have at least one `null` value (via `append_null`) to make sure that works correctly



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org