You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/13 07:50:12 UTC

[arrow-rs] branch master updated: Make consistent behavior on zeros equality on floating point types (#3510)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new d49cd21f9 Make consistent behavior on zeros equality on floating point types (#3510)
d49cd21f9 is described below

commit d49cd21f9c5ac27961041f7a2a9dbf4cea9708de
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Thu Jan 12 23:50:05 2023 -0800

    Make consistent behavior on zeros equality on floating point types (#3510)
    
    * Treat positive and negative float zeros as equal
    
    * Update doc
    
    * Add test
    
    * Make build_compare consistent with comparison kernels
---
 arrow-ord/src/comparison.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++
 arrow-ord/src/ord.rs        | 36 +++++-------------------
 2 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index 80c8b6b1c..4754aeb1f 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -628,6 +628,8 @@ macro_rules! dyn_compare_utf8_scalar {
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
 where
@@ -647,6 +649,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn lt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
 where
@@ -666,6 +670,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn lt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
 where
@@ -685,6 +691,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn gt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
 where
@@ -704,6 +712,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn gt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
 where
@@ -723,6 +733,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn neq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
 where
@@ -2098,6 +2110,8 @@ where
 ///
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 ///
 /// # Example
@@ -2141,6 +2155,8 @@ pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
 ///
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 ///
 /// # Example
@@ -2186,6 +2202,8 @@ pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arro
 ///
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 ///
 /// # Example
@@ -2231,6 +2249,8 @@ pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
 ///
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 ///
 /// # Example
@@ -2278,6 +2298,8 @@ pub fn lt_eq_dyn(
 ///
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 ///
 /// # Example
@@ -2322,6 +2344,8 @@ pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
 ///
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 ///
 /// # Example
@@ -2366,6 +2390,8 @@ pub fn gt_eq_dyn(
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn eq<T>(
     left: &PrimitiveArray<T>,
@@ -2386,6 +2412,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn eq_scalar<T>(
     left: &PrimitiveArray<T>,
@@ -2418,6 +2446,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn neq<T>(
     left: &PrimitiveArray<T>,
@@ -2438,6 +2468,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn neq_scalar<T>(
     left: &PrimitiveArray<T>,
@@ -2459,6 +2491,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn lt<T>(
     left: &PrimitiveArray<T>,
@@ -2480,6 +2514,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn lt_scalar<T>(
     left: &PrimitiveArray<T>,
@@ -2501,6 +2537,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn lt_eq<T>(
     left: &PrimitiveArray<T>,
@@ -2522,6 +2560,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn lt_eq_scalar<T>(
     left: &PrimitiveArray<T>,
@@ -2543,6 +2583,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn gt<T>(
     left: &PrimitiveArray<T>,
@@ -2564,6 +2606,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn gt_scalar<T>(
     left: &PrimitiveArray<T>,
@@ -2585,6 +2629,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn gt_eq<T>(
     left: &PrimitiveArray<T>,
@@ -2606,6 +2652,8 @@ where
 /// If `simd` feature flag is not enabled:
 /// For floating values like f32 and f64, this comparison produces an ordering in accordance to
 /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 pub fn gt_eq_scalar<T>(
     left: &PrimitiveArray<T>,
@@ -5828,6 +5876,25 @@ mod tests {
         assert_eq!(e, r);
     }
 
+    #[test]
+    #[cfg(not(feature = "simd"))]
+    fn test_floating_zeros() {
+        let a = Float32Array::from(vec![0.0_f32, -0.0]);
+        let b = Float32Array::from(vec![-0.0_f32, 0.0]);
+
+        let result = eq_dyn(&a, &b).unwrap();
+        let excepted = BooleanArray::from(vec![false, false]);
+        assert_eq!(excepted, result);
+
+        let result = eq_dyn_scalar(&a, 0.0).unwrap();
+        let excepted = BooleanArray::from(vec![true, false]);
+        assert_eq!(excepted, result);
+
+        let result = eq_dyn_scalar(&a, -0.0).unwrap();
+        let excepted = BooleanArray::from(vec![false, true]);
+        assert_eq!(excepted, result);
+    }
+
     #[derive(Debug)]
     struct ToType {}
 
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index b7737c6de..00b6668ad 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -21,32 +21,21 @@ use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType};
-use num::Float;
 use std::cmp::Ordering;
 
 /// Compare the values at two arbitrary indices in two arrays.
 pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
 
-/// compares two floats, placing NaNs at last
-fn cmp_nans_last<T: Float>(a: &T, b: &T) -> Ordering {
-    match (a.is_nan(), b.is_nan()) {
-        (true, true) => Ordering::Equal,
-        (true, false) => Ordering::Greater,
-        (false, true) => Ordering::Less,
-        _ => a.partial_cmp(b).unwrap(),
-    }
-}
-
 fn compare_primitives<T: ArrowPrimitiveType>(
     left: &dyn Array,
     right: &dyn Array,
 ) -> DynComparator
 where
-    T::Native: Ord,
+    T::Native: ArrowNativeTypeOp,
 {
     let left: PrimitiveArray<T> = PrimitiveArray::from(left.data().clone());
     let right: PrimitiveArray<T> = PrimitiveArray::from(right.data().clone());
-    Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
+    Box::new(move |i, j| left.value(i).compare(right.value(j)))
 }
 
 fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator {
@@ -56,18 +45,6 @@ fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator {
     Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
 }
 
-fn compare_float<T: ArrowPrimitiveType>(
-    left: &dyn Array,
-    right: &dyn Array,
-) -> DynComparator
-where
-    T::Native: Float,
-{
-    let left: PrimitiveArray<T> = PrimitiveArray::from(left.data().clone());
-    let right: PrimitiveArray<T> = PrimitiveArray::from(right.data().clone());
-    Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j)))
-}
-
 fn compare_string<T>(left: &dyn Array, right: &dyn Array) -> DynComparator
 where
     T: OffsetSizeTrait,
@@ -197,8 +174,8 @@ pub fn build_compare(
         (Int16, Int16) => compare_primitives::<Int16Type>(left, right),
         (Int32, Int32) => compare_primitives::<Int32Type>(left, right),
         (Int64, Int64) => compare_primitives::<Int64Type>(left, right),
-        (Float32, Float32) => compare_float::<Float32Type>(left, right),
-        (Float64, Float64) => compare_float::<Float64Type>(left, right),
+        (Float32, Float32) => compare_primitives::<Float32Type>(left, right),
+        (Float64, Float64) => compare_primitives::<Float64Type>(left, right),
         (Decimal128(_, _), Decimal128(_, _)) => {
             compare_primitives::<Decimal128Type>(left, right)
         }
@@ -372,6 +349,7 @@ pub mod tests {
         let cmp = build_compare(&array, &array).unwrap();
 
         assert_eq!(Ordering::Less, (cmp)(0, 1));
+        assert_eq!(Ordering::Equal, (cmp)(1, 1));
     }
 
     #[test]
@@ -380,8 +358,8 @@ pub mod tests {
 
         let cmp = build_compare(&array, &array).unwrap();
 
-        assert_eq!(Ordering::Equal, (cmp)(0, 1));
-        assert_eq!(Ordering::Equal, (cmp)(1, 0));
+        assert_eq!(Ordering::Less, (cmp)(0, 1));
+        assert_eq!(Ordering::Greater, (cmp)(1, 0));
     }
 
     #[test]