You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2021/08/14 02:46:19 UTC

[spark] branch master updated: [SPARK-36508][SQL] ANSI type coercion: disallow binary operations between Interval and String literal

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new ecdea91  [SPARK-36508][SQL] ANSI type coercion: disallow binary operations between Interval and String literal
ecdea91 is described below

commit ecdea9160213fefb1936ef41f9968d394d109389
Author: Gengliang Wang <ge...@apache.org>
AuthorDate: Sat Aug 14 10:45:05 2021 +0800

    [SPARK-36508][SQL] ANSI type coercion: disallow binary operations between Interval and String literal
    
    ### What changes were proposed in this pull request?
    
    If a binary operation contains interval type and string literal, we can't decide which interval type the string literal should be promoted as. There are many possible interval types, such as year interval, month interval, day interval, hour interval, etc.
    The related binary operation for Interval contains
    - Add
    - Subtract
    - Comparisions
    
    Note that `Interval Multiple/Divide StringLiteral` is valid as them is not binary operators(the left and right are not of the same type). This PR also add tests for them.
    
    ### Why are the changes needed?
    
    Avoid ambiguously implicit casting string literals to interval types.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, the ANSI type coercion is not released yet.
    
    ### How was this patch tested?
    
    New tests.
    
    Closes #33737 from gengliangwang/disallowStringAndInterval.
    
    Authored-by: Gengliang Wang <ge...@apache.org>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../sql/catalyst/analysis/AnsiTypeCoercion.scala   |  16 +-
 .../test/resources/sql-tests/inputs/interval.sql   |  27 +++
 .../sql-tests/results/ansi/interval.sql.out        | 189 ++++++++++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 164 +++++++++++++++++-
 4 files changed, 386 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 61724ae..f6d3ac2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -270,14 +270,26 @@ object AnsiTypeCoercion extends TypeCoercionBase {
       }
     }
 
+    // Return whether a string literal can be promoted as the give data type in a binary operation.
+    private def canPromoteAsInBinaryOperation(dt: DataType) = dt match {
+      // If a binary operation contains interval type and string literal, we can't decide which
+      // interval type the string literal should be promoted as. There are many possible interval
+      // types, such as year interval, month interval, day interval, hour interval, etc.
+      case _: YearMonthIntervalType | _: DayTimeIntervalType => false
+      case _: AtomicType => true
+      case _ => false
+    }
+
     override def transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
-      case b @ BinaryOperator(left @ StringType(), right @ AtomicType()) if left.foldable =>
+      case b @ BinaryOperator(left @ StringType(), right)
+        if left.foldable && canPromoteAsInBinaryOperation(right.dataType) =>
         b.makeCopy(Array(castExpr(left, right.dataType), right))
 
-      case b @ BinaryOperator(left @ AtomicType(), right @ StringType()) if right.foldable =>
+      case b @ BinaryOperator(left, right @ StringType())
+        if right.foldable && canPromoteAsInBinaryOperation(left.dataType) =>
         b.makeCopy(Array(left, castExpr(right, left.dataType)))
 
       case Abs(e @ StringType(), failOnError) if e.foldable => Abs(Cast(e, DoubleType), failOnError)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index be13a25..618cf16 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -11,6 +11,25 @@ select interval 2147483647 month / 0.5;
 select interval 2147483647 day * 2;
 select interval 2147483647 day / 0.5;
 
+-- multiply and divide an interval by a string literal
+select interval 2 second * '2';
+select interval 2 second / '2';
+select interval 2 year * '2';
+select interval 2 year / '2';
+select interval 2 second * 'a';
+select interval 2 second / 'a';
+select interval 2 year * 'a';
+select interval 2 year / 'a';
+
+select '2' * interval 2 second;
+select '2' * interval 2 year;
+select 'a' * interval 2 second;
+select 'a' * interval 2 year;
+
+-- invalid: string literal / interval
+select '2' / interval 2 second;
+select '2' / interval 2 year;
+
 -- interval operation with null and zero case
 select interval '2 seconds' / 0;
 select interval '2 seconds' / null;
@@ -318,6 +337,14 @@ SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH;
 SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH;
 SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;
 
+-- compare interval with string
+SELECT INTERVAL '1' DAY < '1';
+SELECT INTERVAL '1' DAY = '1';
+SELECT INTERVAL '1' DAY > '1';
+SELECT INTERVAL '1' YEAR < '1';
+SELECT INTERVAL '1' YEAR = '1';
+SELECT INTERVAL '1' YEAR > '1';
+
 SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
 SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
 SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 4e784b0..e0bf076 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 231
+-- Number of queries: 251
 
 
 -- !query
@@ -85,6 +85,126 @@ long overflow
 
 
 -- !query
+select interval 2 second * '2'
+-- !query schema
+struct<(INTERVAL '02' SECOND * 2):interval day to second>
+-- !query output
+0 00:00:04.000000000
+
+
+-- !query
+select interval 2 second / '2'
+-- !query schema
+struct<(INTERVAL '02' SECOND / 2):interval day to second>
+-- !query output
+0 00:00:01.000000000
+
+
+-- !query
+select interval 2 year * '2'
+-- !query schema
+struct<(INTERVAL '2' YEAR * 2):interval year to month>
+-- !query output
+4-0
+
+
+-- !query
+select interval 2 year / '2'
+-- !query schema
+struct<(INTERVAL '2' YEAR / 2):interval year to month>
+-- !query output
+1-0
+
+
+-- !query
+select interval 2 second * 'a'
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: a
+
+
+-- !query
+select interval 2 second / 'a'
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: a
+
+
+-- !query
+select interval 2 year * 'a'
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: a
+
+
+-- !query
+select interval 2 year / 'a'
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: a
+
+
+-- !query
+select '2' * interval 2 second
+-- !query schema
+struct<(INTERVAL '02' SECOND * 2):interval day to second>
+-- !query output
+0 00:00:04.000000000
+
+
+-- !query
+select '2' * interval 2 year
+-- !query schema
+struct<(INTERVAL '2' YEAR * 2):interval year to month>
+-- !query output
+4-0
+
+
+-- !query
+select 'a' * interval 2 second
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: a
+
+
+-- !query
+select 'a' * interval 2 year
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: a
+
+
+-- !query
+select '2' / interval 2 second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('2' / INTERVAL '02' SECOND)' due to data type mismatch: differing types in '('2' / INTERVAL '02' SECOND)' (string and interval second).; line 1 pos 7
+
+
+-- !query
+select '2' / interval 2 year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('2' / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '('2' / INTERVAL '2' YEAR)' (string and interval year).; line 1 pos 7
+
+
+-- !query
 select interval '2 seconds' / 0
 -- !query schema
 struct<>
@@ -1229,8 +1349,8 @@ select
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Interval string does not match year-month format of `[+|-]d`, `INTERVAL [+|-]'[+|-]d' DAY` when cast to interval year: 3-3 year to month
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '2' YEAR + '3-3 year to month')' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + '3-3 year to month')' (interval year and string).; line 2 pos 2
 
 
 -- !query
@@ -1254,16 +1374,17 @@ select interval '2' year + '3-3'
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Interval string does not match year-month format of `[+|-]d`, `INTERVAL [+|-]'[+|-]d' DAY` when cast to interval year: 3-3
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '2' YEAR + '3-3')' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + '3-3')' (interval year and string).; line 1 pos 7
 
 
 -- !query
 select interval '2' year - '4'
 -- !query schema
-struct<(INTERVAL '2' YEAR - 4):interval year>
+struct<>
 -- !query output
--2-0
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '2' YEAR - '4')' due to data type mismatch: differing types in '(INTERVAL '2' YEAR - '4')' (interval year and string).; line 1 pos 7
 
 
 -- !query
@@ -2180,6 +2301,60 @@ cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type misma
 
 
 -- !query
+SELECT INTERVAL '1' DAY < '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' DAY < '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY < '1')' (interval day and string).; line 1 pos 7
+
+
+-- !query
+SELECT INTERVAL '1' DAY = '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' DAY = '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY = '1')' (interval day and string).; line 1 pos 7
+
+
+-- !query
+SELECT INTERVAL '1' DAY > '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7
+
+
+-- !query
+SELECT INTERVAL '1' YEAR < '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' YEAR < '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR < '1')' (interval year and string).; line 1 pos 7
+
+
+-- !query
+SELECT INTERVAL '1' YEAR = '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' YEAR = '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR = '1')' (interval year and string).; line 1 pos 7
+
+
+-- !query
+SELECT INTERVAL '1' YEAR > '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7
+
+
+-- !query
 SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
 -- !query schema
 struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to month>>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index a6e2599..3e6380b 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 231
+-- Number of queries: 251
 
 
 -- !query
@@ -85,6 +85,120 @@ long overflow
 
 
 -- !query
+select interval 2 second * '2'
+-- !query schema
+struct<(INTERVAL '02' SECOND * 2):interval day to second>
+-- !query output
+0 00:00:04.000000000
+
+
+-- !query
+select interval 2 second / '2'
+-- !query schema
+struct<(INTERVAL '02' SECOND / 2):interval day to second>
+-- !query output
+0 00:00:01.000000000
+
+
+-- !query
+select interval 2 year * '2'
+-- !query schema
+struct<(INTERVAL '2' YEAR * 2):interval year to month>
+-- !query output
+4-0
+
+
+-- !query
+select interval 2 year / '2'
+-- !query schema
+struct<(INTERVAL '2' YEAR / 2):interval year to month>
+-- !query output
+1-0
+
+
+-- !query
+select interval 2 second * 'a'
+-- !query schema
+struct<(INTERVAL '02' SECOND * a):interval day to second>
+-- !query output
+NULL
+
+
+-- !query
+select interval 2 second / 'a'
+-- !query schema
+struct<(INTERVAL '02' SECOND / a):interval day to second>
+-- !query output
+NULL
+
+
+-- !query
+select interval 2 year * 'a'
+-- !query schema
+struct<(INTERVAL '2' YEAR * a):interval year to month>
+-- !query output
+NULL
+
+
+-- !query
+select interval 2 year / 'a'
+-- !query schema
+struct<(INTERVAL '2' YEAR / a):interval year to month>
+-- !query output
+NULL
+
+
+-- !query
+select '2' * interval 2 second
+-- !query schema
+struct<(INTERVAL '02' SECOND * 2):interval day to second>
+-- !query output
+0 00:00:04.000000000
+
+
+-- !query
+select '2' * interval 2 year
+-- !query schema
+struct<(INTERVAL '2' YEAR * 2):interval year to month>
+-- !query output
+4-0
+
+
+-- !query
+select 'a' * interval 2 second
+-- !query schema
+struct<(INTERVAL '02' SECOND * a):interval day to second>
+-- !query output
+NULL
+
+
+-- !query
+select 'a' * interval 2 year
+-- !query schema
+struct<(INTERVAL '2' YEAR * a):interval year to month>
+-- !query output
+NULL
+
+
+-- !query
+select '2' / interval 2 second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2' AS DOUBLE) / INTERVAL '02' SECOND)' due to data type mismatch: differing types in '(CAST('2' AS DOUBLE) / INTERVAL '02' SECOND)' (double and interval second).; line 1 pos 7
+
+
+-- !query
+select '2' / interval 2 year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2' AS DOUBLE) / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '(CAST('2' AS DOUBLE) / INTERVAL '2' YEAR)' (double and interval year).; line 1 pos 7
+
+
+-- !query
 select interval '2 seconds' / 0
 -- !query schema
 struct<>
@@ -2176,6 +2290,54 @@ cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type misma
 
 
 -- !query
+SELECT INTERVAL '1' DAY < '1'
+-- !query schema
+struct<(INTERVAL '1' DAY < 1):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' DAY = '1'
+-- !query schema
+struct<(INTERVAL '1' DAY = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL '1' DAY > '1'
+-- !query schema
+struct<(INTERVAL '1' DAY > 1):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' YEAR < '1'
+-- !query schema
+struct<(INTERVAL '1' YEAR < 1):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' YEAR = '1'
+-- !query schema
+struct<(INTERVAL '1' YEAR = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL '1' YEAR > '1'
+-- !query schema
+struct<(INTERVAL '1' YEAR > 1):boolean>
+-- !query output
+false
+
+
+-- !query
 SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
 -- !query schema
 struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to month>>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org