You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/13 07:50:12 UTC
[arrow-rs] branch master updated: Make consistent behavior on zeros equality on floating point types (#3510)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new d49cd21f9 Make consistent behavior on zeros equality on floating point types (#3510)
d49cd21f9 is described below
commit d49cd21f9c5ac27961041f7a2a9dbf4cea9708de
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Thu Jan 12 23:50:05 2023 -0800
Make consistent behavior on zeros equality on floating point types (#3510)
* Treat positive and negative float zeros as equal
* Update doc
* Add test
* Make build_compare consistent with comparison kernels
---
arrow-ord/src/comparison.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++
arrow-ord/src/ord.rs | 36 +++++-------------------
2 files changed, 74 insertions(+), 29 deletions(-)
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index 80c8b6b1c..4754aeb1f 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -628,6 +628,8 @@ macro_rules! dyn_compare_utf8_scalar {
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
where
@@ -647,6 +649,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
where
@@ -666,6 +670,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
where
@@ -685,6 +691,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
where
@@ -704,6 +712,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
where
@@ -723,6 +733,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn neq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError>
where
@@ -2098,6 +2110,8 @@ where
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
@@ -2141,6 +2155,8 @@ pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
@@ -2186,6 +2202,8 @@ pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arro
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
@@ -2231,6 +2249,8 @@ pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
@@ -2278,6 +2298,8 @@ pub fn lt_eq_dyn(
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
@@ -2322,6 +2344,8 @@ pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
@@ -2366,6 +2390,8 @@ pub fn gt_eq_dyn(
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn eq<T>(
left: &PrimitiveArray<T>,
@@ -2386,6 +2412,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn eq_scalar<T>(
left: &PrimitiveArray<T>,
@@ -2418,6 +2446,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn neq<T>(
left: &PrimitiveArray<T>,
@@ -2438,6 +2468,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn neq_scalar<T>(
left: &PrimitiveArray<T>,
@@ -2459,6 +2491,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt<T>(
left: &PrimitiveArray<T>,
@@ -2480,6 +2514,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_scalar<T>(
left: &PrimitiveArray<T>,
@@ -2501,6 +2537,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_eq<T>(
left: &PrimitiveArray<T>,
@@ -2522,6 +2560,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_eq_scalar<T>(
left: &PrimitiveArray<T>,
@@ -2543,6 +2583,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt<T>(
left: &PrimitiveArray<T>,
@@ -2564,6 +2606,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_scalar<T>(
left: &PrimitiveArray<T>,
@@ -2585,6 +2629,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_eq<T>(
left: &PrimitiveArray<T>,
@@ -2606,6 +2652,8 @@ where
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
+/// Note that totalOrder treats positive and negative zeros are different. If it is necessary
+/// to treat them as equal, please normalize zeros before calling this kernel.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_eq_scalar<T>(
left: &PrimitiveArray<T>,
@@ -5828,6 +5876,25 @@ mod tests {
assert_eq!(e, r);
}
+ #[test]
+ #[cfg(not(feature = "simd"))]
+ fn test_floating_zeros() {
+ let a = Float32Array::from(vec![0.0_f32, -0.0]);
+ let b = Float32Array::from(vec![-0.0_f32, 0.0]);
+
+ let result = eq_dyn(&a, &b).unwrap();
+ let excepted = BooleanArray::from(vec![false, false]);
+ assert_eq!(excepted, result);
+
+ let result = eq_dyn_scalar(&a, 0.0).unwrap();
+ let excepted = BooleanArray::from(vec![true, false]);
+ assert_eq!(excepted, result);
+
+ let result = eq_dyn_scalar(&a, -0.0).unwrap();
+ let excepted = BooleanArray::from(vec![false, true]);
+ assert_eq!(excepted, result);
+ }
+
#[derive(Debug)]
struct ToType {}
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index b7737c6de..00b6668ad 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -21,32 +21,21 @@ use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_schema::{ArrowError, DataType};
-use num::Float;
use std::cmp::Ordering;
/// Compare the values at two arbitrary indices in two arrays.
pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
-/// compares two floats, placing NaNs at last
-fn cmp_nans_last<T: Float>(a: &T, b: &T) -> Ordering {
- match (a.is_nan(), b.is_nan()) {
- (true, true) => Ordering::Equal,
- (true, false) => Ordering::Greater,
- (false, true) => Ordering::Less,
- _ => a.partial_cmp(b).unwrap(),
- }
-}
-
fn compare_primitives<T: ArrowPrimitiveType>(
left: &dyn Array,
right: &dyn Array,
) -> DynComparator
where
- T::Native: Ord,
+ T::Native: ArrowNativeTypeOp,
{
let left: PrimitiveArray<T> = PrimitiveArray::from(left.data().clone());
let right: PrimitiveArray<T> = PrimitiveArray::from(right.data().clone());
- Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
+ Box::new(move |i, j| left.value(i).compare(right.value(j)))
}
fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator {
@@ -56,18 +45,6 @@ fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator {
Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
}
-fn compare_float<T: ArrowPrimitiveType>(
- left: &dyn Array,
- right: &dyn Array,
-) -> DynComparator
-where
- T::Native: Float,
-{
- let left: PrimitiveArray<T> = PrimitiveArray::from(left.data().clone());
- let right: PrimitiveArray<T> = PrimitiveArray::from(right.data().clone());
- Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j)))
-}
-
fn compare_string<T>(left: &dyn Array, right: &dyn Array) -> DynComparator
where
T: OffsetSizeTrait,
@@ -197,8 +174,8 @@ pub fn build_compare(
(Int16, Int16) => compare_primitives::<Int16Type>(left, right),
(Int32, Int32) => compare_primitives::<Int32Type>(left, right),
(Int64, Int64) => compare_primitives::<Int64Type>(left, right),
- (Float32, Float32) => compare_float::<Float32Type>(left, right),
- (Float64, Float64) => compare_float::<Float64Type>(left, right),
+ (Float32, Float32) => compare_primitives::<Float32Type>(left, right),
+ (Float64, Float64) => compare_primitives::<Float64Type>(left, right),
(Decimal128(_, _), Decimal128(_, _)) => {
compare_primitives::<Decimal128Type>(left, right)
}
@@ -372,6 +349,7 @@ pub mod tests {
let cmp = build_compare(&array, &array).unwrap();
assert_eq!(Ordering::Less, (cmp)(0, 1));
+ assert_eq!(Ordering::Equal, (cmp)(1, 1));
}
#[test]
@@ -380,8 +358,8 @@ pub mod tests {
let cmp = build_compare(&array, &array).unwrap();
- assert_eq!(Ordering::Equal, (cmp)(0, 1));
- assert_eq!(Ordering::Equal, (cmp)(1, 0));
+ assert_eq!(Ordering::Less, (cmp)(0, 1));
+ assert_eq!(Ordering::Greater, (cmp)(1, 0));
}
#[test]