You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2022/06/13 10:04:19 UTC

[spark] branch master updated: [SPARK-39451][SQL] Support casting intervals to integrals in ANSI mode

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a4f96afdc14 [SPARK-39451][SQL] Support casting intervals to integrals in ANSI mode
a4f96afdc14 is described below

commit a4f96afdc147bfee3e0f195b7bcf3dfa882ad511
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Mon Jun 13 13:04:01 2022 +0300

    [SPARK-39451][SQL] Support casting intervals to integrals in ANSI mode
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to support casting of interval types to the integral type: `TINYINT`, `SMALLINT`, `INT`, `BIGINT`.
    
    ### Why are the changes needed?
    To conform the SQL standard which allows such casting:
    <img width="801" alt="Screenshot 2022-06-12 at 13 04 44" src="https://user-images.githubusercontent.com/1580697/173228149-17e1fbaa-c095-4eb7-bb3b-81a3f9c91928.png">
    
    ### Does this PR introduce _any_ user-facing change?
    No, it extends existing behavior.
    
    ### How was this patch tested?
    By running new tests:
    ```
    $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z cast.sql"
    ```
    
    Closes #36811 from MaxGekk/cast-interval-to-int.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala      |  5 +-
 .../src/test/resources/sql-tests/inputs/cast.sql   | 12 ++++
 .../resources/sql-tests/results/ansi/cast.sql.out  | 77 +++++++++++++++++++++-
 .../test/resources/sql-tests/results/cast.sql.out  | 77 +++++++++++++++++++++-
 4 files changed, 167 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 497261be2e4..0746bc0fcd0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -108,8 +108,9 @@ object Cast {
     case (TimestampType, TimestampNTZType) => true
 
     case (StringType, _: CalendarIntervalType) => true
-    case (StringType, _: DayTimeIntervalType) => true
-    case (StringType, _: YearMonthIntervalType) => true
+    case (StringType, _: AnsiIntervalType) => true
+
+    case (_: AnsiIntervalType, _: IntegralType) => true
 
     case (_: DayTimeIntervalType, _: DayTimeIntervalType) => true
     case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
index 4610716902e..5198611a2b3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
@@ -104,3 +104,15 @@ select cast('a' as timestamp_ntz);
 
 select cast(cast('inf' as double) as timestamp);
 select cast(cast('inf' as float) as timestamp);
+
+-- cast ANSI intervals to numerics
+select cast(interval '1' year as tinyint);
+select cast(interval '-10-2' year to month as smallint);
+select cast(interval '1000' month as int);
+select cast(interval -'10.123456' second as tinyint);
+select cast(interval '23:59:59' hour to second as smallint);
+select cast(interval -'1 02:03:04.123' day to second as int);
+select cast(interval '10' day as bigint);
+
+select cast(interval '-1000' month as tinyint);
+select cast(interval '1000000' second as smallint);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
index f1c2dbd3d7d..b05a85d2927 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 72
+-- Number of queries: 81
 
 
 -- !query
@@ -766,3 +766,78 @@ org.apache.spark.SparkDateTimeException
 == SQL(line 1, position 8) ==
 select cast(cast('inf' as float) as timestamp)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+-- !query
+select cast(interval '1' year as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(interval '-10-2' year to month as smallint)
+-- !query schema
+struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
+-- !query output
+-122
+
+
+-- !query
+select cast(interval '1000' month as int)
+-- !query schema
+struct<CAST(INTERVAL '1000' MONTH AS INT):int>
+-- !query output
+1000
+
+
+-- !query
+select cast(interval -'10.123456' second as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint>
+-- !query output
+-10
+
+
+-- !query
+select cast(interval '23:59:59' hour to second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CAST_OVERFLOW] The value INTERVAL '23:59:59' HOUR TO SECOND of the type "INTERVAL HOUR TO SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+
+
+-- !query
+select cast(interval -'1 02:03:04.123' day to second as int)
+-- !query schema
+struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int>
+-- !query output
+-93784
+
+
+-- !query
+select cast(interval '10' day as bigint)
+-- !query schema
+struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint>
+-- !query output
+10
+
+
+-- !query
+select cast(interval '-1000' month as tinyint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CAST_OVERFLOW] The value INTERVAL '-1000' MONTH of the type "INTERVAL MONTH" cannot be cast to "TINYINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+
+
+-- !query
+select cast(interval '1000000' second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CAST_OVERFLOW] The value INTERVAL '1000000' SECOND of the type "INTERVAL SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index aaa82e43513..dbb32a5ed31 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 72
+-- Number of queries: 81
 
 
 -- !query
@@ -594,3 +594,78 @@ select cast(cast('inf' as float) as timestamp)
 struct<CAST(CAST(inf AS FLOAT) AS TIMESTAMP):timestamp>
 -- !query output
 NULL
+
+
+-- !query
+select cast(interval '1' year as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(interval '-10-2' year to month as smallint)
+-- !query schema
+struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
+-- !query output
+-122
+
+
+-- !query
+select cast(interval '1000' month as int)
+-- !query schema
+struct<CAST(INTERVAL '1000' MONTH AS INT):int>
+-- !query output
+1000
+
+
+-- !query
+select cast(interval -'10.123456' second as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint>
+-- !query output
+-10
+
+
+-- !query
+select cast(interval '23:59:59' hour to second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CAST_OVERFLOW] The value INTERVAL '23:59:59' HOUR TO SECOND of the type "INTERVAL HOUR TO SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+
+
+-- !query
+select cast(interval -'1 02:03:04.123' day to second as int)
+-- !query schema
+struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int>
+-- !query output
+-93784
+
+
+-- !query
+select cast(interval '10' day as bigint)
+-- !query schema
+struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint>
+-- !query output
+10
+
+
+-- !query
+select cast(interval '-1000' month as tinyint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CAST_OVERFLOW] The value INTERVAL '-1000' MONTH of the type "INTERVAL MONTH" cannot be cast to "TINYINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+
+
+-- !query
+select cast(interval '1000000' second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+[CAST_OVERFLOW] The value INTERVAL '1000000' SECOND of the type "INTERVAL SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org