You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2021/08/12 17:04:27 UTC

[spark] branch branch-3.2 updated: [SPARK-36445][SQL][FOLLOWUP] ANSI type coercion: revisit promoting string literals in datetime expressions

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 3785738  [SPARK-36445][SQL][FOLLOWUP] ANSI type coercion: revisit promoting string literals in datetime expressions
3785738 is described below

commit 37857388389e34ebec5215ba29b5b78dac1cfb2c
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Fri Aug 13 01:02:34 2021 +0800

    [SPARK-36445][SQL][FOLLOWUP] ANSI type coercion: revisit promoting string literals in datetime expressions
    
    ### What changes were proposed in this pull request?
    
    1. Promote more string literal in subtractions. In the ANSI type coercion rule, we already promoted
    ```
    string - timestamp => cast(string as timestamp) - timestamp
    ```
    This PR is to promote the following string literals:
    ```
    string - date => cast(string as date) - date
    date - string => date - cast(date as string)
    timestamp - string => timestamp
    ```
    It is very straightforward to cast the string literal as the data type of the other side in the subtraction.
    
    2. Merge the string promotion logic from the rule `StringLiteralCoercion`:
    ```
    date_sub(date, string) => date_sub(date, cast(string as int))
    date_add(date, string) => date_add(date, cast(string as int))
    ```
    
    ### Why are the changes needed?
    
    1. Promote the string literal in the subtraction as the data type of the other side. This is straightforward and consistent with PostgreSQL
    2. Certerize all the string literal promotion in the ANSI type coercion rule
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, the new ANSI type coercion rules are not released yet.
    
    ### How was this patch tested?
    
    Existing UT
    
    Closes #33724 from gengliangwang/datetimeTypeCoercion.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
    (cherry picked from commit 48e333af5473f849274c4247313ea7b51de70faf)
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../spark/sql/catalyst/analysis/AnsiTypeCoercion.scala     | 14 ++++++++++++--
 sql/core/src/test/resources/sql-tests/inputs/date.sql      |  4 +++-
 sql/core/src/test/resources/sql-tests/inputs/timestamp.sql |  4 +++-
 .../src/test/resources/sql-tests/results/ansi/date.sql.out |  8 ++++----
 .../resources/sql-tests/results/ansi/timestamp.sql.out     |  5 ++---
 .../sql-tests/results/timestampNTZ/timestamp-ansi.sql.out  |  5 ++---
 6 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index c5f0845..61724ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -91,7 +91,6 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       ImplicitTypeCasts ::
       DateTimeOperations ::
       WindowFrameCoercion ::
-      StringLiteralCoercion ::
       GetDateFieldOperations:: Nil) :: Nil
 
   val findTightestCommonType: (DataType, DataType) => Option[DataType] = {
@@ -297,12 +296,23 @@ object AnsiTypeCoercion extends TypeCoercionBase {
 
       case d @ DateAdd(left @ StringType(), _) if left.foldable =>
         d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateAdd(_, right @ StringType()) if right.foldable =>
+        d.copy(days = Cast(right, IntegerType))
       case d @ DateSub(left @ StringType(), _) if left.foldable =>
         d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(_, right @ StringType()) if right.foldable =>
+        d.copy(days = Cast(right, IntegerType))
+
+      case s @ SubtractDates(left @ StringType(), _, _) if left.foldable =>
+        s.copy(left = Cast(s.left, DateType))
+      case s @ SubtractDates(_, right @ StringType(), _) if right.foldable =>
+        s.copy(right = Cast(s.right, DateType))
       case t @ TimeAdd(left @ StringType(), _, _) if left.foldable =>
         t.copy(start = Cast(t.start, TimestampType))
       case t @ SubtractTimestamps(left @ StringType(), _, _, _) if left.foldable =>
-        t.copy(left = Cast(t.left, TimestampType))
+        t.copy(left = Cast(t.left, t.right.dataType))
+      case t @ SubtractTimestamps(_, right @ StringType(), _, _) if right.foldable =>
+        t.copy(right = Cast(right, t.left.dataType))
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index bfa96ed1e..57049eb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -120,9 +120,11 @@ select null - date '2019-10-06';
 select date_str - date '2001-09-28' from date_view;
 select date '2001-09-28' - date_str from date_view;
 
--- invalid: date + string/null literal
+-- invalid: date + string literal
 select date'2011-11-11' + '1';
 select '1' + date'2011-11-11';
+
+-- null result: date + null
 select date'2011-11-11' + null;
 select null + date'2011-11-11';
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
index fc8416f..f8a3b3c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
@@ -108,9 +108,11 @@ create temporary view ts_view as select '2011-11-11 11:11:11' str;
 select str - timestamp'2011-11-11 11:11:11' from ts_view;
 select timestamp'2011-11-11 11:11:11' - str from ts_view;
 
--- invalid: timestamp + string/null literal
+-- invalid: timestamp + string literal
 select timestamp'2011-11-11 11:11:11' + '1';
 select '1' + timestamp'2011-11-11 11:11:11';
+
+-- null result: timestamp + null
 select timestamp'2011-11-11 11:11:11' + null;
 select null + timestamp'2011-11-11 11:11:11';
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 46eaa68..0196b99 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -331,8 +331,8 @@ select date_add('2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
-The second argument of 'date_add' function needs to be an integer.
+java.lang.NumberFormatException
+invalid input syntax for type numeric: 1.2
 
 
 -- !query
@@ -441,8 +441,8 @@ select date_sub(date'2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
-The second argument of 'date_sub' function needs to be an integer.
+java.lang.NumberFormatException
+invalid input syntax for type numeric: 1.2
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index a136615..948a790 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -559,10 +559,9 @@ struct<(TIMESTAMP '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval day
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query schema
-struct<>
+struct<(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
+0 00:00:01.000000000
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 7c179c7..a355c2d 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -559,10 +559,9 @@ struct<(TIMESTAMP_NTZ '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query schema
-struct<>
+struct<(TIMESTAMP_NTZ '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
+0 00:00:01.000000000
 
 
 -- !query

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org