You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ja...@apache.org on 2023/06/08 03:31:30 UTC

[arrow-datafusion] branch main updated: feat: type coercion support date - date (#6578)

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 9c13a771f9 feat: type coercion support date - date (#6578)
9c13a771f9 is described below

commit 9c13a771f957327c1f9b467803e72df3c5e43c0e
Author: jakevin <ja...@gmail.com>
AuthorDate: Thu Jun 8 11:31:24 2023 +0800

    feat: type coercion support date - date (#6578)
---
 .../core/tests/sqllogictests/test_files/dates.slt  |  3 +-
 datafusion/expr/src/type_coercion/binary.rs        | 45 ++++++++++++----------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt b/datafusion/core/tests/sqllogictests/test_files/dates.slt
index bfeb92fd59..5b76739e95 100644
--- a/datafusion/core/tests/sqllogictests/test_files/dates.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt
@@ -90,7 +90,8 @@ select i_item_desc from test
 where d3_date > now() + '5 days';
 
 # DATE minus DATE
-query error DataFusion error: Error during planning: Unsupported argument types\. Can not evaluate Date32 \- Date32
+# https://github.com/apache/arrow-rs/issues/4383
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nArrow error: Cast error: Cannot perform arithmetic operation between array of type Date32 and array of type Date32
 SELECT DATE '2023-04-09' - DATE '2023-04-02';
 
 # DATE minus Timestamp
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 86c5c15f14..f8a04de45b 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -55,8 +55,7 @@ fn mathematics_temporal_result_type(
         (Interval(_), Date64) => Some(rhs_type.clone()),
         (Date64, Interval(_)) => Some(lhs_type.clone()),
         // interval +/-
-        (Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)),
-        (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
+        (Interval(l), Interval(h)) if l == h => Some(lhs_type.clone()),
         (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
         // timestamp - timestamp
         (Timestamp(Second, _), Timestamp(Second, _))
@@ -68,7 +67,10 @@ fn mathematics_temporal_result_type(
             Some(Interval(MonthDayNano))
         }
         (Timestamp(_, _), Timestamp(_, _)) => None,
-        // TODO: date minus date
+        // date - date
+        (Date32, Date32) => Some(Interval(DayTime)),
+        (Date64, Date64) => Some(Interval(MonthDayNano)),
+        (Date32, Date64) | (Date64, Date32) => Some(Interval(MonthDayNano)),
         // date - timestamp, timestamp - date
         (Date32, Timestamp(_, _))
         | (Timestamp(_, _), Date32)
@@ -733,10 +735,13 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {
 fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     use arrow::datatypes::IntervalUnit::*;
+    use arrow::datatypes::TimeUnit::*;
+
+    if lhs_type == rhs_type {
+        return Some(lhs_type.clone());
+    }
     match (lhs_type, rhs_type) {
         // interval +/-
-        (Interval(YearMonth), Interval(YearMonth)) => Some(Interval(YearMonth)),
-        (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
         (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
         (Date64, Date32) | (Date32, Date64) => Some(Date64),
         (Utf8, Date32) | (Date32, Utf8) => Some(Date32),
@@ -754,13 +759,13 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
             }
         }
         (Timestamp(_, tz), Utf8) | (Utf8, Timestamp(_, tz)) => {
-            Some(Timestamp(TimeUnit::Nanosecond, tz.clone()))
+            Some(Timestamp(Nanosecond, tz.clone()))
         }
         (Timestamp(_, None), Date32) | (Date32, Timestamp(_, None)) => {
-            Some(Timestamp(TimeUnit::Nanosecond, None))
+            Some(Timestamp(Nanosecond, None))
         }
         (Timestamp(_, _tz), Date32) | (Date32, Timestamp(_, _tz)) => {
-            Some(Timestamp(TimeUnit::Nanosecond, None))
+            Some(Timestamp(Nanosecond, None))
         }
         (Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => {
             let tz = match (lhs_tz, rhs_tz) {
@@ -778,18 +783,18 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataTyp
             };
 
             let unit = match (lhs_unit, rhs_unit) {
-                (TimeUnit::Second, TimeUnit::Millisecond) => TimeUnit::Second,
-                (TimeUnit::Second, TimeUnit::Microsecond) => TimeUnit::Second,
-                (TimeUnit::Second, TimeUnit::Nanosecond) => TimeUnit::Second,
-                (TimeUnit::Millisecond, TimeUnit::Second) => TimeUnit::Second,
-                (TimeUnit::Millisecond, TimeUnit::Microsecond) => TimeUnit::Millisecond,
-                (TimeUnit::Millisecond, TimeUnit::Nanosecond) => TimeUnit::Millisecond,
-                (TimeUnit::Microsecond, TimeUnit::Second) => TimeUnit::Second,
-                (TimeUnit::Microsecond, TimeUnit::Millisecond) => TimeUnit::Millisecond,
-                (TimeUnit::Microsecond, TimeUnit::Nanosecond) => TimeUnit::Microsecond,
-                (TimeUnit::Nanosecond, TimeUnit::Second) => TimeUnit::Second,
-                (TimeUnit::Nanosecond, TimeUnit::Millisecond) => TimeUnit::Millisecond,
-                (TimeUnit::Nanosecond, TimeUnit::Microsecond) => TimeUnit::Microsecond,
+                (Second, Millisecond) => Second,
+                (Second, Microsecond) => Second,
+                (Second, Nanosecond) => Second,
+                (Millisecond, Second) => Second,
+                (Millisecond, Microsecond) => Millisecond,
+                (Millisecond, Nanosecond) => Millisecond,
+                (Microsecond, Second) => Second,
+                (Microsecond, Millisecond) => Millisecond,
+                (Microsecond, Nanosecond) => Microsecond,
+                (Nanosecond, Second) => Second,
+                (Nanosecond, Millisecond) => Millisecond,
+                (Nanosecond, Microsecond) => Microsecond,
                 (l, r) => {
                     assert_eq!(l, r);
                     l.clone()