You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/27 21:35:09 UTC

[GitHub] [arrow-rs] martin-g commented on a diff in pull request #1951: Add add_dyn for DictionaryArray support

martin-g commented on code in PR #1951:
URL: https://github.com/apache/arrow-rs/pull/1951#discussion_r907817838


##########
arrow/src/compute/kernels/arithmetic.rs:
##########
@@ -423,6 +429,245 @@ where
     Ok(PrimitiveArray::<T>::from(data))
 }
 
+/// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the same) key type $KT
+macro_rules! typed_dict_op {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr, $KT: tt) => {{
+        match ($LEFT.value_type(), $RIGHT.value_type()) {
+            (DataType::Int8, DataType::Int8) => {
+                let array = math_op_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Int16, DataType::Int16) => {
+                let array = math_op_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Int32, DataType::Int32) => {
+                let array = math_op_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Int64, DataType::Int64) => {
+                let array = math_op_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt8, DataType::UInt8) => {
+                let array = math_op_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt16, DataType::UInt16) => {
+                let array = math_op_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt32, DataType::UInt32) => {
+                let array = math_op_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::UInt64, DataType::UInt64) => {
+                let array = math_op_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Float32, DataType::Float32) => {
+                let array = math_op_dict::<$KT, Float32Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (DataType::Float64, DataType::Float64) => {
+                let array = math_op_dict::<$KT, Float64Type, _>($LEFT, $RIGHT, $OP)?;
+                Ok(Arc::new(array))
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot perform arithmetic operation on two dictionary arrays of different value types ({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_dict_math_op {
+   // Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are `DictionaryArray`
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match ($LEFT.data_type(), $RIGHT.data_type()) {
+            (DataType::Dictionary(left_key_type, _), DataType::Dictionary(right_key_type, _))=> {
+                match (left_key_type.as_ref(), right_key_type.as_ref()) {
+                    (DataType::Int8, DataType::Int8) => {
+                        let left = as_dictionary_array::<Int8Type>($LEFT);
+                        let right = as_dictionary_array::<Int8Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int8Type)
+                    }
+                    (DataType::Int16, DataType::Int16) => {
+                        let left = as_dictionary_array::<Int16Type>($LEFT);
+                        let right = as_dictionary_array::<Int16Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int16Type)
+                    }
+                    (DataType::Int32, DataType::Int32) => {
+                        let left = as_dictionary_array::<Int32Type>($LEFT);
+                        let right = as_dictionary_array::<Int32Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int32Type)
+                    }
+                    (DataType::Int64, DataType::Int64) => {
+                        let left = as_dictionary_array::<Int64Type>($LEFT);
+                        let right = as_dictionary_array::<Int64Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, Int64Type)
+                    }
+                    (DataType::UInt8, DataType::UInt8) => {
+                        let left = as_dictionary_array::<UInt8Type>($LEFT);
+                        let right = as_dictionary_array::<UInt8Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt8Type)
+                    }
+                    (DataType::UInt16, DataType::UInt16) => {
+                        let left = as_dictionary_array::<UInt16Type>($LEFT);
+                        let right = as_dictionary_array::<UInt16Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt16Type)
+                    }
+                    (DataType::UInt32, DataType::UInt32) => {
+                        let left = as_dictionary_array::<UInt32Type>($LEFT);
+                        let right = as_dictionary_array::<UInt32Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt32Type)
+                    }
+                    (DataType::UInt64, DataType::UInt64) => {
+                        let left = as_dictionary_array::<UInt64Type>($LEFT);
+                        let right = as_dictionary_array::<UInt64Type>($RIGHT);
+                        typed_dict_op!(left, right, $OP, UInt64Type)
+                    }
+                    (t1, t2) => Err(ArrowError::CastError(format!(
+                        "Cannot perform arithmetic operation on two dictionary arrays of different key types ({} and {})",
+                        t1, t2
+                    ))),
+                }
+            }
+            (t1, t2) => Err(ArrowError::CastError(format!(
+                "Cannot perform arithmetic operation on dictionary array with non-dictionary array ({} and {})",
+                t1, t2
+            ))),
+        }
+    }};
+}
+
+macro_rules! typed_op {
+    ($LEFT: expr, $RIGHT: expr, $T: ident, $OP: expr) => {{
+        let left = $LEFT
+            .as_any()
+            .downcast_ref::<PrimitiveArray<$T>>()
+            .ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Left array cannot be cast to {}",
+                    type_name::<$T>()
+                ))
+            })?;
+        let right = $RIGHT
+            .as_any()
+            .downcast_ref::<PrimitiveArray<$T>>()
+            .ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Right array cannot be cast to {}",
+                    type_name::<$T>(),
+                ))
+            })?;
+        let array = math_op(left, right, $OP)?;
+        Ok(Arc::new(array))
+    }};
+}
+
+macro_rules! typed_math_op {
+    ($LEFT: expr, $RIGHT: expr, $OP: expr) => {{
+        match $LEFT.data_type() {
+            DataType::Int8 => {
+                typed_op!($LEFT, $RIGHT, Int8Type, $OP)
+            }
+            DataType::Int16 => {
+                typed_op!($LEFT, $RIGHT, Int16Type, $OP)
+            }
+            DataType::Int32 => {
+                typed_op!($LEFT, $RIGHT, Int32Type, $OP)
+            }
+            DataType::Int64 => {
+                typed_op!($LEFT, $RIGHT, Int64Type, $OP)
+            }
+            DataType::UInt8 => {
+                typed_op!($LEFT, $RIGHT, UInt8Type, $OP)
+            }
+            DataType::UInt16 => {
+                typed_op!($LEFT, $RIGHT, UInt16Type, $OP)
+            }
+            DataType::UInt32 => {
+                typed_op!($LEFT, $RIGHT, UInt32Type, $OP)
+            }
+            DataType::UInt64 => {
+                typed_op!($LEFT, $RIGHT, UInt64Type, $OP)
+            }
+            DataType::Float32 => {
+                typed_op!($LEFT, $RIGHT, Float32Type, $OP)
+            }
+            DataType::Float64 => {
+                typed_op!($LEFT, $RIGHT, Float64Type, $OP)
+            }
+            t => Err(ArrowError::CastError(format!(
+                "Cannot perform arithmetic operation on arrays of type {}",
+                t
+            ))),
+        }
+    }};
+}
+
+/// Helper function to perform boolean lambda function on values from two dictionary arrays, this
+/// version does not attempt to use SIMD explicitly (though the compiler may auto vectorize)
+macro_rules! math_dict_op {
+    ($left: expr, $right:expr, $op:expr, $value_ty:ty) => {{
+        if $left.len() != $right.len() {
+            return Err(ArrowError::ComputeError(
+                "Cannot perform operation on arrays of different length".to_string(),

Review Comment:
   I think it would be useful to print the lengths in the error message



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org