You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/09/09 13:39:32 UTC
[arrow-datafusion] branch main updated: Remove implicit interval type coercion from ScalarValue comparison (#7514)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 7b9bb08c67 Remove implicit interval type coercion from ScalarValue comparison (#7514)
7b9bb08c67 is described below
commit 7b9bb08c67a408f8a3dedc3ac5e0b091ba486c69
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sat Sep 9 14:39:24 2023 +0100
Remove implicit interval type coercion from ScalarValue comparison (#7514)
---
datafusion/common/src/scalar.rs | 210 +---------------------------------------
1 file changed, 3 insertions(+), 207 deletions(-)
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 28452c6a31..88e0f3a301 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -47,14 +47,6 @@ use arrow::{
};
use arrow_array::{ArrowNativeTypeOp, Scalar};
-// Constants we use throughout this file:
-const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
-const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
-const SECS_IN_ONE_MONTH: i64 = 2_592_000; // assuming 30 days.
-const MILLISECS_IN_ONE_MONTH: i64 = 2_592_000_000; // assuming 30 days.
-const MICROSECS_IN_ONE_MONTH: i64 = 2_592_000_000_000; // assuming 30 days.
-const NANOSECS_IN_ONE_MONTH: i128 = 2_592_000_000_000_000; // assuming 30 days.
-
/// Represents a dynamically typed, nullable single value.
/// This is the single-valued counter-part to arrow's [`Array`].
///
@@ -237,28 +229,10 @@ impl PartialEq for ScalarValue {
(DurationNanosecond(v1), DurationNanosecond(v2)) => v1.eq(v2),
(DurationNanosecond(_), _) => false,
(IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2),
- (IntervalYearMonth(v1), IntervalDayTime(v2)) => {
- ym_to_milli(v1).eq(&dt_to_milli(v2))
- }
- (IntervalYearMonth(v1), IntervalMonthDayNano(v2)) => {
- ym_to_nano(v1).eq(&mdn_to_nano(v2))
- }
(IntervalYearMonth(_), _) => false,
(IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2),
- (IntervalDayTime(v1), IntervalYearMonth(v2)) => {
- dt_to_milli(v1).eq(&ym_to_milli(v2))
- }
- (IntervalDayTime(v1), IntervalMonthDayNano(v2)) => {
- dt_to_nano(v1).eq(&mdn_to_nano(v2))
- }
(IntervalDayTime(_), _) => false,
(IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2),
- (IntervalMonthDayNano(v1), IntervalYearMonth(v2)) => {
- mdn_to_nano(v1).eq(&ym_to_nano(v2))
- }
- (IntervalMonthDayNano(v1), IntervalDayTime(v2)) => {
- mdn_to_nano(v1).eq(&dt_to_nano(v2))
- }
(IntervalMonthDayNano(_), _) => false,
(Struct(v1, t1), Struct(v2, t2)) => v1.eq(v2) && t1.eq(t2),
(Struct(_, _), _) => false,
@@ -377,28 +351,10 @@ impl PartialOrd for ScalarValue {
}
(TimestampNanosecond(_, _), _) => None,
(IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2),
- (IntervalYearMonth(v1), IntervalDayTime(v2)) => {
- ym_to_milli(v1).partial_cmp(&dt_to_milli(v2))
- }
- (IntervalYearMonth(v1), IntervalMonthDayNano(v2)) => {
- ym_to_nano(v1).partial_cmp(&mdn_to_nano(v2))
- }
(IntervalYearMonth(_), _) => None,
(IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2),
- (IntervalDayTime(v1), IntervalYearMonth(v2)) => {
- dt_to_milli(v1).partial_cmp(&ym_to_milli(v2))
- }
- (IntervalDayTime(v1), IntervalMonthDayNano(v2)) => {
- dt_to_nano(v1).partial_cmp(&mdn_to_nano(v2))
- }
(IntervalDayTime(_), _) => None,
(IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2),
- (IntervalMonthDayNano(v1), IntervalYearMonth(v2)) => {
- mdn_to_nano(v1).partial_cmp(&ym_to_nano(v2))
- }
- (IntervalMonthDayNano(v1), IntervalDayTime(v2)) => {
- mdn_to_nano(v1).partial_cmp(&dt_to_nano(v2))
- }
(IntervalMonthDayNano(_), _) => None,
(DurationSecond(v1), DurationSecond(v2)) => v1.partial_cmp(v2),
(DurationSecond(_), _) => None,
@@ -431,122 +387,6 @@ impl PartialOrd for ScalarValue {
}
}
-/// This function computes the duration (in milliseconds) of the given
-/// year-month-interval.
-#[inline]
-pub fn ym_to_sec(val: &Option<i32>) -> Option<i64> {
- val.map(|value| (value as i64) * SECS_IN_ONE_MONTH)
-}
-
-/// This function computes the duration (in milliseconds) of the given
-/// year-month-interval.
-#[inline]
-pub fn ym_to_milli(val: &Option<i32>) -> Option<i64> {
- val.map(|value| (value as i64) * MILLISECS_IN_ONE_MONTH)
-}
-
-/// This function computes the duration (in milliseconds) of the given
-/// year-month-interval.
-#[inline]
-pub fn ym_to_micro(val: &Option<i32>) -> Option<i64> {
- val.map(|value| (value as i64) * MICROSECS_IN_ONE_MONTH)
-}
-
-/// This function computes the duration (in nanoseconds) of the given
-/// year-month-interval.
-#[inline]
-pub fn ym_to_nano(val: &Option<i32>) -> Option<i128> {
- val.map(|value| (value as i128) * NANOSECS_IN_ONE_MONTH)
-}
-
-/// This function computes the duration (in seconds) of the given
-/// daytime-interval.
-#[inline]
-pub fn dt_to_sec(val: &Option<i64>) -> Option<i64> {
- val.map(|val| {
- let (days, millis) = IntervalDayTimeType::to_parts(val);
- (days as i64) * MILLISECS_IN_ONE_DAY + (millis as i64 / 1_000)
- })
-}
-
-/// This function computes the duration (in milliseconds) of the given
-/// daytime-interval.
-#[inline]
-pub fn dt_to_milli(val: &Option<i64>) -> Option<i64> {
- val.map(|val| {
- let (days, millis) = IntervalDayTimeType::to_parts(val);
- (days as i64) * MILLISECS_IN_ONE_DAY + (millis as i64)
- })
-}
-
-/// This function computes the duration (in microseconds) of the given
-/// daytime-interval.
-#[inline]
-pub fn dt_to_micro(val: &Option<i64>) -> Option<i128> {
- val.map(|val| {
- let (days, millis) = IntervalDayTimeType::to_parts(val);
- (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + (millis as i128) * 1_000
- })
-}
-
-/// This function computes the duration (in nanoseconds) of the given
-/// daytime-interval.
-#[inline]
-pub fn dt_to_nano(val: &Option<i64>) -> Option<i128> {
- val.map(|val| {
- let (days, millis) = IntervalDayTimeType::to_parts(val);
- (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + (millis as i128) * 1_000_000
- })
-}
-
-/// This function computes the duration (in seconds) of the given
-/// month-day-nano-interval. Assumes a month is 30 days long.
-#[inline]
-pub fn mdn_to_sec(val: &Option<i128>) -> Option<i128> {
- val.map(|val| {
- let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val);
- (months as i128) * NANOSECS_IN_ONE_MONTH
- + (days as i128) * (NANOSECS_IN_ONE_DAY as i128)
- + (nanos as i128) / 1_000_000_000
- })
-}
-
-/// This function computes the duration (in milliseconds) of the given
-/// month-day-nano-interval. Assumes a month is 30 days long.
-#[inline]
-pub fn mdn_to_milli(val: &Option<i128>) -> Option<i128> {
- val.map(|val| {
- let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val);
- (months as i128) * NANOSECS_IN_ONE_MONTH
- + (days as i128) * (NANOSECS_IN_ONE_DAY as i128)
- + (nanos as i128) / 1_000_000
- })
-}
-
-/// This function computes the duration (in microseconds) of the given
-/// month-day-nano-interval. Assumes a month is 30 days long.
-#[inline]
-pub fn mdn_to_micro(val: &Option<i128>) -> Option<i128> {
- val.map(|val| {
- let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val);
- (months as i128) * NANOSECS_IN_ONE_MONTH
- + (days as i128) * (NANOSECS_IN_ONE_DAY as i128)
- + (nanos as i128) / 1_000
- })
-}
-
-/// This function computes the duration (in nanoseconds) of the given
-/// month-day-nano-interval. Assumes a month is 30 days long.
-#[inline]
-pub fn mdn_to_nano(val: &Option<i128>) -> Option<i128> {
- val.map(|val| {
- let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val);
- (months as i128) * NANOSECS_IN_ONE_MONTH
- + (days as i128) * (NANOSECS_IN_ONE_DAY as i128)
- + (nanos as i128)
- })
-}
-
impl Eq for ScalarValue {}
//Float wrapper over f32/f64. Just because we cannot build std::hash::Hash for floats directly we have to do it through type wrapper
@@ -4106,53 +3946,6 @@ mod tests {
])),
None
);
- // Different type of intervals can be compared.
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(1, 2)))
- < IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
- 14, 0, 1
- ))),
- );
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 4)))
- >= IntervalDayTime(Some(IntervalDayTimeType::make_value(119, 1)))
- );
- assert!(
- IntervalDayTime(Some(IntervalDayTimeType::make_value(12, 86_399_999)))
- >= IntervalDayTime(Some(IntervalDayTimeType::make_value(12, 0)))
- );
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(2, 12)))
- == IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
- 36, 0, 0
- ))),
- );
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0)))
- != IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 1)))
- );
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(1, 4)))
- == IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 16))),
- );
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 3)))
- > IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
- 2,
- 28,
- 999_999_999
- ))),
- );
- assert!(
- IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 1)))
- > IntervalDayTime(Some(IntervalDayTimeType::make_value(29, 9_999))),
- );
- assert!(
- IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(1, 12, 34)))
- > IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(
- 0, 142, 34
- )))
- );
}
#[test]
@@ -5197,6 +4990,9 @@ mod tests {
}
fn get_random_intervals(sample_size: u64) -> Vec<ScalarValue> {
+ const MILLISECS_IN_ONE_DAY: i64 = 86_400_000;
+ const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000;
+
let vector_size = sample_size;
let mut intervals = vec![];
let mut rng = rand::thread_rng();