You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/02/17 13:11:18 UTC

[arrow-datafusion] branch master updated: Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow (#1475)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d68b6d  Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow (#1475)
4d68b6d is described below

commit 4d68b6d37c486e94c2fb9a30a297849ef5eb6931
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Thu Feb 17 08:11:14 2022 -0500

    Use`eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` kernels from arrow (#1475)
    
    * Use Dynamic Dispatch kernels in Arrow `eq_dyn`
    
    * wrap eq_dyn
    
    * Rework how kernels are wrapped
    
    * touchups
    
    * update comment
---
 datafusion/src/physical_plan/expressions/binary.rs | 72 +++++++++++++++++-----
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs
index 9f007a2..6f9084a 100644
--- a/datafusion/src/physical_plan/expressions/binary.rs
+++ b/datafusion/src/physical_plan/expressions/binary.rs
@@ -25,10 +25,9 @@ use arrow::compute::kernels::arithmetic::{
     multiply_scalar, subtract, subtract_scalar,
 };
 use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
-use arrow::compute::kernels::comparison::{eq, gt, gt_eq, lt, lt_eq, neq};
 use arrow::compute::kernels::comparison::{
-    eq_bool, eq_bool_scalar, gt_bool, gt_bool_scalar, gt_eq_bool, gt_eq_bool_scalar,
-    lt_bool, lt_bool_scalar, lt_eq_bool, lt_eq_bool_scalar, neq_bool, neq_bool_scalar,
+    eq_bool_scalar, gt_bool_scalar, gt_eq_bool_scalar, lt_bool_scalar, lt_eq_bool_scalar,
+    neq_bool_scalar,
 };
 use arrow::compute::kernels::comparison::{
     eq_dyn_bool_scalar, gt_dyn_bool_scalar, gt_eq_dyn_bool_scalar, lt_dyn_bool_scalar,
@@ -46,14 +45,11 @@ use arrow::compute::kernels::comparison::{
     eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
 };
 use arrow::compute::kernels::comparison::{
-    eq_utf8, gt_eq_utf8, gt_utf8, like_utf8, lt_eq_utf8, lt_utf8, neq_utf8, nlike_utf8,
-    regexp_is_match_utf8,
-};
-use arrow::compute::kernels::comparison::{
     eq_utf8_scalar, gt_eq_utf8_scalar, gt_utf8_scalar, like_utf8_scalar,
     lt_eq_utf8_scalar, lt_utf8_scalar, neq_utf8_scalar, nlike_utf8_scalar,
     regexp_is_match_utf8_scalar,
 };
+use arrow::compute::kernels::comparison::{like_utf8, nlike_utf8, regexp_is_match_utf8};
 use arrow::datatypes::{ArrowNumericType, DataType, Schema, TimeUnit};
 use arrow::error::ArrowError::DivideByZero;
 use arrow::record_batch::RecordBatch;
@@ -65,6 +61,50 @@ use crate::physical_plan::{ColumnarValue, PhysicalExpr};
 use crate::scalar::ScalarValue;
 use datafusion_expr::Operator;
 
+// TODO move to arrow_rs
+// https://github.com/apache/arrow-rs/issues/1312
+fn as_decimal_array(arr: &dyn Array) -> &DecimalArray {
+    arr.as_any()
+        .downcast_ref::<DecimalArray>()
+        .expect("Unable to downcast to typed array to DecimalArray")
+}
+
+/// create a `dyn_op` wrapper function for the specified operation
+/// that call the underlying dyn_op arrow kernel if the type is
+/// supported, and translates ArrowError to DataFusionError
+macro_rules! make_dyn_comp_op {
+    ($OP:tt) => {
+        paste::paste! {
+            /// wrapper over arrow compute kernel that maps Error types and
+            /// patches missing support in arrow
+            fn [<$OP _dyn>] (left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
+                match (left.data_type(), right.data_type()) {
+                    // Call `op_decimal` (e.g. `eq_decimal) until
+                    // arrow has native support
+                    // https://github.com/apache/arrow-rs/issues/1200
+                    (DataType::Decimal(_, _), DataType::Decimal(_, _)) => {
+                        [<$OP _decimal>](as_decimal_array(left), as_decimal_array(right))
+                    },
+                    // By default call the arrow kernel
+                    _ => {
+                    arrow::compute::kernels::comparison::[<$OP _dyn>](left, right)
+                            .map_err(|e| e.into())
+                    }
+                }
+                .map(|a| Arc::new(a) as ArrayRef)
+            }
+        }
+    };
+}
+
+// create eq_dyn, gt_dyn, wrappers etc
+make_dyn_comp_op!(eq);
+make_dyn_comp_op!(gt);
+make_dyn_comp_op!(gt_eq);
+make_dyn_comp_op!(lt);
+make_dyn_comp_op!(lt_eq);
+make_dyn_comp_op!(neq);
+
 // Simple (low performance) kernels until optimized kernels are added to arrow
 // See https://github.com/apache/arrow-rs/issues/960
 
@@ -91,8 +131,10 @@ fn is_not_distinct_from_bool(
         .collect())
 }
 
-// TODO add iter for decimal array
-// TODO move this to arrow-rs
+// TODO move decimal kernels to to arrow-rs
+// https://github.com/apache/arrow-rs/issues/1200
+
+// TODO use iter added for for decimal array in
 // https://github.com/apache/arrow-rs/issues/1083
 pub(super) fn eq_decimal_scalar(
     left: &DecimalArray,
@@ -1194,12 +1236,12 @@ impl BinaryExpr {
         match &self.op {
             Operator::Like => binary_string_array_op!(left, right, like),
             Operator::NotLike => binary_string_array_op!(left, right, nlike),
-            Operator::Lt => binary_array_op!(left, right, lt),
-            Operator::LtEq => binary_array_op!(left, right, lt_eq),
-            Operator::Gt => binary_array_op!(left, right, gt),
-            Operator::GtEq => binary_array_op!(left, right, gt_eq),
-            Operator::Eq => binary_array_op!(left, right, eq),
-            Operator::NotEq => binary_array_op!(left, right, neq),
+            Operator::Lt => lt_dyn(&left, &right),
+            Operator::LtEq => lt_eq_dyn(&left, &right),
+            Operator::Gt => gt_dyn(&left, &right),
+            Operator::GtEq => gt_eq_dyn(&left, &right),
+            Operator::Eq => eq_dyn(&left, &right),
+            Operator::NotEq => neq_dyn(&left, &right),
             Operator::IsDistinctFrom => binary_array_op!(left, right, is_distinct_from),
             Operator::IsNotDistinctFrom => {
                 binary_array_op!(left, right, is_not_distinct_from)