You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/01/14 17:54:04 UTC

[GitHub] [arrow-datafusion] alamb commented on a change in pull request #1483: support comparison for decimal data type and refactor the binary coercion rule

alamb commented on a change in pull request #1483:
URL: https://github.com/apache/arrow-datafusion/pull/1483#discussion_r785024926



##########
File path: datafusion/src/physical_plan/coercion_rule/binary_rule.rs
##########
@@ -0,0 +1,229 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Support the coercion rule for binary operation
+
+use crate::arrow::datatypes::DataType;
+use crate::error::{DataFusionError, Result};
+use crate::logical_plan::Operator;
+use crate::physical_plan::expressions::coercion::{
+    dictionary_coercion, eq_coercion, is_numeric, like_coercion, numerical_coercion,
+    string_coercion, temporal_coercion,
+};
+
+/// Coercion rules for all binary operators. Returns the output type
+/// of applying `op` to an argument of `lhs_type` and `rhs_type`.
+pub(crate) fn coerce_types(
+    lhs_type: &DataType,
+    op: &Operator,
+    rhs_type: &DataType,
+) -> Result<DataType> {
+    // This result MUST be compatible with `binary_coerce`
+    let result = match op {
+        Operator::And | Operator::Or => match (lhs_type, rhs_type) {
+            // logical binary boolean operators can only be evaluated in bools
+            (DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
+            _ => None,
+        },
+        // logical equality operators have their own rules, and always return a boolean
+        Operator::Eq | Operator::NotEq => comparison_eq_coercion(lhs_type, rhs_type),
+        // order-comparison operators have their own rules
+        Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
+            comparison_order_coercion(lhs_type, rhs_type)
+        }
+        // "like" operators operate on strings and always return a boolean
+        Operator::Like | Operator::NotLike => like_coercion(lhs_type, rhs_type),
+        // for math expressions, the final value of the coercion is also the return type
+        // because coercion favours higher information types
+        // TODO: support decimal data type

Review comment:
       I think this TODO is now complete

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -79,6 +76,193 @@ fn is_not_distinct_from_bool(
         .collect())
 }
 
+// TODO add iter for decimal array
+// TODO move this to arrow-rs

Review comment:
       👍 

##########
File path: datafusion/src/physical_plan/expressions/coercion.rs
##########
@@ -190,20 +191,6 @@ pub fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
         .or_else(|| temporal_coercion(lhs_type, rhs_type))
 }
 
-// coercion rules that assume an ordered set, such as "less than".

Review comment:
       👍 

##########
File path: datafusion/src/physical_plan/coercion_rule/binary_rule.rs
##########
@@ -0,0 +1,229 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Support the coercion rule for binary operation
+
+use crate::arrow::datatypes::DataType;
+use crate::error::{DataFusionError, Result};
+use crate::logical_plan::Operator;
+use crate::physical_plan::expressions::coercion::{
+    dictionary_coercion, eq_coercion, is_numeric, like_coercion, numerical_coercion,
+    string_coercion, temporal_coercion,
+};
+
+/// Coercion rules for all binary operators. Returns the output type
+/// of applying `op` to an argument of `lhs_type` and `rhs_type`.
+pub(crate) fn coerce_types(
+    lhs_type: &DataType,
+    op: &Operator,
+    rhs_type: &DataType,
+) -> Result<DataType> {
+    // This result MUST be compatible with `binary_coerce`
+    let result = match op {
+        Operator::And | Operator::Or => match (lhs_type, rhs_type) {
+            // logical binary boolean operators can only be evaluated in bools
+            (DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
+            _ => None,
+        },
+        // logical equality operators have their own rules, and always return a boolean
+        Operator::Eq | Operator::NotEq => comparison_eq_coercion(lhs_type, rhs_type),
+        // order-comparison operators have their own rules
+        Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
+            comparison_order_coercion(lhs_type, rhs_type)
+        }
+        // "like" operators operate on strings and always return a boolean
+        Operator::Like | Operator::NotLike => like_coercion(lhs_type, rhs_type),
+        // for math expressions, the final value of the coercion is also the return type
+        // because coercion favours higher information types
+        // TODO: support decimal data type
+        Operator::Plus
+        | Operator::Minus
+        | Operator::Modulo
+        | Operator::Divide
+        | Operator::Multiply => numerical_coercion(lhs_type, rhs_type),
+        Operator::RegexMatch
+        | Operator::RegexIMatch
+        | Operator::RegexNotMatch
+        | Operator::RegexNotIMatch => string_coercion(lhs_type, rhs_type),
+        Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => {
+            eq_coercion(lhs_type, rhs_type)
+        }
+    };
+
+    // re-write the error message of failed coercions to include the operator's information
+    match result {
+        None => Err(DataFusionError::Plan(
+            format!(
+                "'{:?} {} {:?}' can't be evaluated because there isn't a common type to coerce the types to",
+                lhs_type, op, rhs_type
+            ),
+        )),
+        Some(t) => Ok(t)
+    }
+}
+
+fn comparison_eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+    if lhs_type == rhs_type {
+        // same type => equality is possible
+        return Some(lhs_type.clone());
+    }
+    comparison_binary_numeric_coercion(lhs_type, rhs_type)
+        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
+        .or_else(|| temporal_coercion(lhs_type, rhs_type))
+}
+
+fn comparison_order_coercion(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    if lhs_type == rhs_type {
+        // same type => all good
+        return Some(lhs_type.clone());
+    }
+    comparison_binary_numeric_coercion(lhs_type, rhs_type)
+        .or_else(|| string_coercion(lhs_type, rhs_type))
+        .or_else(|| dictionary_coercion(lhs_type, rhs_type))
+        .or_else(|| temporal_coercion(lhs_type, rhs_type))
+}
+
+fn comparison_binary_numeric_coercion(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
+        return None;
+    };
+
+    // same type => all good
+    if lhs_type == rhs_type {
+        return Some(lhs_type.clone());
+    }
+
+    // these are ordered from most informative to least informative so
+    // that the coercion removes the least amount of information
+    match (lhs_type, rhs_type) {
+        // support decimal data type for comparison operation
+        (Decimal(p1, s1), Decimal(p2, s2)) => Some(Decimal(*p1.max(p2), *s1.max(s2))),
+        (Decimal(_, _), _) => get_comparison_common_decimal_type(lhs_type, rhs_type),
+        (_, Decimal(_, _)) => get_comparison_common_decimal_type(rhs_type, lhs_type),
+        (Float64, _) | (_, Float64) => Some(Float64),
+        (_, Float32) | (Float32, _) => Some(Float32),
+        (Int64, _) | (_, Int64) => Some(Int64),
+        (Int32, _) | (_, Int32) => Some(Int32),
+        (Int16, _) | (_, Int16) => Some(Int16),
+        (Int8, _) | (_, Int8) => Some(Int8),
+        (UInt64, _) | (_, UInt64) => Some(UInt64),
+        (UInt32, _) | (_, UInt32) => Some(UInt32),
+        (UInt16, _) | (_, UInt16) => Some(UInt16),
+        (UInt8, _) | (_, UInt8) => Some(UInt8),
+        _ => None,
+    }
+}
+
+fn get_comparison_common_decimal_type(
+    decimal_type: &DataType,
+    other_type: &DataType,
+) -> Option<DataType> {
+    let other_decimal_type = &match other_type {
+        // This conversion rule is from spark
+        // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127

Review comment:
       thank you for the reference

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -1907,4 +2226,348 @@ mod tests {
             .collect();
         assert_eq!(result.as_ref(), &expected);
     }
+
+    fn create_decimal_array(
+        array: &[Option<i128>],
+        precision: usize,
+        scale: usize,
+    ) -> Result<DecimalArray> {
+        let mut decimal_builder = DecimalBuilder::new(array.len(), precision, scale);
+        for value in array {
+            match value {
+                None => {
+                    decimal_builder.append_null()?;
+                }
+                Some(v) => {
+                    decimal_builder.append_value(*v)?;
+                }
+            }
+        }
+        Ok(decimal_builder.finish())
+    }
+
+    #[test]
+    fn comparison_decimal_op_test() -> Result<()> {
+        let value_i128: i128 = 123;
+        let decimal_array = create_decimal_array(
+            &[
+                Some(value_i128),
+                None,
+                Some(value_i128 - 1),
+                Some(value_i128 + 1),
+            ],
+            25,
+            3,
+        )?;
+        // eq: array = i128
+        let result = eq_decimal_scalar(&decimal_array, value_i128)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(false)]),
+            result
+        );
+        // neq: array != i128
+        let result = neq_decimal_scalar(&decimal_array, value_i128)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(true)]),
+            result
+        );
+        // lt: array < i128
+        let result = lt_decimal_scalar(&decimal_array, value_i128)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]),
+            result
+        );
+        // lt_eq: array <= i128
+        let result = lt_eq_decimal_scalar(&decimal_array, value_i128)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]),
+            result
+        );
+        // gt: array > i128
+        let result = gt_decimal_scalar(&decimal_array, value_i128)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(false), Some(true)]),
+            result
+        );
+        // gt_eq: array >= i128
+        let result = gt_eq_decimal_scalar(&decimal_array, value_i128)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]),
+            result
+        );
+
+        let left_decimal_array = decimal_array;
+        let right_decimal_array = create_decimal_array(
+            &[
+                Some(value_i128 - 1),
+                Some(value_i128),
+                Some(value_i128 + 1),
+                Some(value_i128 + 1),
+            ],
+            25,
+            3,
+        )?;
+        // eq: left == right
+        let result = eq_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(false), Some(true)]),
+            result
+        );
+        // neq: left != right
+        let result = neq_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]),
+            result
+        );
+        // lt: left < right
+        let result = lt_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]),
+            result
+        );
+        // lt_eq: left <= right
+        let result = lt_eq_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(true)]),
+            result
+        );
+        // gt: left > right
+        let result = gt_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(false)]),
+            result
+        );
+        // gt_eq: left >= right
+        let result = gt_eq_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]),
+            result
+        );
+        // is_distinct: left distinct right
+        let result = is_distinct_from_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(false)]),
+            result
+        );
+        // is_distinct: left distinct right
+        let result =
+            is_not_distinct_from_decimal(&left_decimal_array, &right_decimal_array)?;
+        assert_eq!(
+            BooleanArray::from(vec![Some(false), Some(false), Some(false), Some(true)]),
+            result
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn comparison_decimal_expr_test() -> Result<()> {
+        let decimal_scalar = ScalarValue::Decimal128(Some(123_456), 10, 3);
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, true)]));
+        // scalar == array
+        apply_logic_op_scalar_arr(
+            &schema,
+            &decimal_scalar,
+            &(Arc::new(Int64Array::from(vec![Some(124), None])) as ArrayRef),
+            Operator::Eq,
+            &BooleanArray::from(vec![Some(false), None]),
+        )
+        .unwrap();
+
+        // array != scalar
+        apply_logic_op_arr_scalar(
+            &schema,
+            &(Arc::new(Int64Array::from(vec![Some(123), None, Some(1)])) as ArrayRef),
+            &decimal_scalar,
+            Operator::NotEq,
+            &BooleanArray::from(vec![Some(true), None, Some(true)]),
+        )
+        .unwrap();
+
+        // array < scalar
+        apply_logic_op_arr_scalar(
+            &schema,
+            &(Arc::new(Int64Array::from(vec![Some(123), None, Some(124)])) as ArrayRef),
+            &decimal_scalar,
+            Operator::Lt,
+            &BooleanArray::from(vec![Some(true), None, Some(false)]),
+        )
+        .unwrap();
+
+        // array > scalar
+        apply_logic_op_arr_scalar(
+            &schema,
+            &(Arc::new(Int64Array::from(vec![Some(123), None, Some(124)])) as ArrayRef),
+            &decimal_scalar,
+            Operator::Gt,
+            &BooleanArray::from(vec![Some(false), None, Some(true)]),
+        )
+        .unwrap();
+
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float64, true)]));
+        // array == scalar
+        apply_logic_op_arr_scalar(
+            &schema,
+            &(Arc::new(Float64Array::from(vec![Some(123.456), None, Some(123.457)]))
+                as ArrayRef),
+            &decimal_scalar,
+            Operator::Eq,
+            &BooleanArray::from(vec![Some(true), None, Some(false)]),
+        )
+        .unwrap();
+
+        // array <= scalar
+        apply_logic_op_arr_scalar(
+            &schema,
+            &(Arc::new(Float64Array::from(vec![
+                Some(123.456),
+                None,
+                Some(123.457),
+                Some(123.45),
+            ])) as ArrayRef),
+            &decimal_scalar,
+            Operator::LtEq,
+            &BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]),
+        )
+        .unwrap();
+        // array >= scalar
+        apply_logic_op_arr_scalar(
+            &schema,
+            &(Arc::new(Float64Array::from(vec![
+                Some(123.456),
+                None,
+                Some(123.457),
+                Some(123.45),
+            ])) as ArrayRef),
+            &decimal_scalar,
+            Operator::GtEq,
+            &BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]),
+        )
+        .unwrap();
+
+        // compare decimal array with other array type
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, true),
+            Field::new("b", DataType::Decimal(10, 0), true),
+        ]));
+
+        let value: i64 = 123;
+
+        let decimal_array = Arc::new(create_decimal_array(
+            &[
+                Some(value as i128),
+                None,
+                Some((value - 1) as i128),
+                Some((value + 1) as i128),
+            ],
+            10,
+            0,
+        )?) as ArrayRef;
+
+        let int64_array = Arc::new(Int64Array::from(vec![
+            Some(value),
+            Some(value - 1),
+            Some(value),
+            Some(value + 1),
+        ])) as ArrayRef;
+
+        // eq: int64array == decimal array
+        apply_logic_op(
+            &schema,
+            &int64_array,
+            &decimal_array,
+            Operator::Eq,
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]),
+        )
+        .unwrap();
+        // neq: int64array != decimal array
+        apply_logic_op(
+            &schema,
+            &int64_array,
+            &decimal_array,
+            Operator::NotEq,
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]),
+        )
+        .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Float64, true),
+            Field::new("b", DataType::Decimal(10, 2), true),
+        ]));
+
+        let value: i128 = 123;
+        let decimal_array = Arc::new(create_decimal_array(
+            &[
+                Some(value as i128), // 1.23
+                None,
+                Some((value - 1) as i128), // 1.22
+                Some((value + 1) as i128), // 1.24
+            ],
+            10,
+            2,
+        )?) as ArrayRef;
+        let float64_array = Arc::new(Float64Array::from(vec![
+            Some(1.23),
+            Some(1.22),
+            Some(1.23),
+            Some(1.24),
+        ])) as ArrayRef;
+        // lt: float64array < decimal array
+        apply_logic_op(
+            &schema,
+            &float64_array,
+            &decimal_array,
+            Operator::Lt,
+            BooleanArray::from(vec![Some(false), None, Some(false), Some(false)]),
+        )
+        .unwrap();
+        // lt_eq: float64array <= decimal array
+        apply_logic_op(
+            &schema,
+            &float64_array,
+            &decimal_array,
+            Operator::LtEq,
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]),
+        )
+        .unwrap();
+        // gt: float64array > decimal array
+        apply_logic_op(
+            &schema,
+            &float64_array,
+            &decimal_array,
+            Operator::Gt,
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(false)]),
+        )
+        .unwrap();
+        apply_logic_op(
+            &schema,
+            &float64_array,
+            &decimal_array,
+            Operator::GtEq,
+            BooleanArray::from(vec![Some(true), None, Some(true), Some(true)]),
+        )
+        .unwrap();
+        // is distinct: float64array is distinct decimal array
+        // TODO: now we do not refactor the `is distinct or is not distinct` rule of coercion.

Review comment:
       maybe this is worth tracking as a ticket so we don't forget

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -79,6 +76,193 @@ fn is_not_distinct_from_bool(
         .collect())
 }
 
+// TODO add iter for decimal array
+// TODO move this to arrow-rs

Review comment:
       I think it is a good idea to put the kernels into DataFusion first and then port to arrow 👍  There are some tricks to make them faster but that will be good discussions to have in arrow

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -79,6 +76,193 @@ fn is_not_distinct_from_bool(
         .collect())
 }
 
+// TODO add iter for decimal array
+// TODO move this to arrow-rs
+// https://github.com/apache/arrow-rs/issues/1083
+pub(super) fn eq_decimal_scalar(
+    left: &DecimalArray,
+    right: i128,
+) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) == right)?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+pub(super) fn eq_decimal(
+    left: &DecimalArray,
+    right: &DecimalArray,
+) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) == right.value(i))?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn neq_decimal_scalar(left: &DecimalArray, right: i128) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) != right)?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn neq_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) != right.value(i))?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn lt_decimal_scalar(left: &DecimalArray, right: i128) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) < right)?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn lt_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) < right.value(i))?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn lt_eq_decimal_scalar(left: &DecimalArray, right: i128) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) <= right)?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn lt_eq_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) <= right.value(i))?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn gt_decimal_scalar(left: &DecimalArray, right: i128) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) > right)?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn gt_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) > right.value(i))?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn gt_eq_decimal_scalar(left: &DecimalArray, right: i128) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) >= right)?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn gt_eq_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_null()?;
+        } else {
+            bool_builder.append_value(left.value(i) >= right.value(i))?;
+        }
+    }
+    Ok(bool_builder.finish())
+}
+
+fn is_distinct_from_decimal(
+    left: &DecimalArray,
+    right: &DecimalArray,
+) -> Result<BooleanArray> {
+    let mut bool_builder = BooleanBuilder::new(left.len());
+    for i in 0..left.len() {
+        if left.is_null(i) && right.is_null(i) {
+            bool_builder.append_value(false)?;
+        } else if left.is_null(i) || right.is_null(i) {
+            bool_builder.append_value(true)?;
+        } else {
+            bool_builder.append_value(left.value(i) != right.value(i))?;

Review comment:
       FWIW you can write this kind of structure in rust like
   
   
   ```suggestion
           match (left.is_null(i), right.is_null(i)) {
               (true, true) => bool_builder.append_value(false)?,
               (true, false) | (false | true) => bool_builder.append_value(true)?;
               _ => bool_builder.append_value(left.value(i) != right.value(i))?;
           }
   ```
   
   Where the benefit is that the compiler will ensure you all your branches are reachable and you have covered all the cases. 
   
   I don't think this needs to be changed, I just wanted to point it out while I was working through this PR

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -1857,22 +2191,7 @@ mod tests {
         ]
         .iter()
         .collect();
-        apply_logic_op(schema, a, b, Operator::IsNotDistinctFrom, expected).unwrap();
-    }
-
-    #[test]

Review comment:
       this test was moved, for anyone following along




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org