You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/09/08 06:32:35 UTC

[spark] branch branch-3.0 updated: [SPARK-32785][SQL][3.0] Interval with dangling parts should not result null

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 3b32ddf  [SPARK-32785][SQL][3.0] Interval with dangling parts should not result null
3b32ddf is described below

commit 3b32ddfd844ffb53e1dc1bed5a90b4d5750a4e45
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Tue Sep 8 06:30:11 2020 +0000

    [SPARK-32785][SQL][3.0] Interval with dangling parts should not result null
    
    THIS PR brings https://github.com/apache/spark/pull/29635 to branch-3.0 and targets v3.0.2
    
    ### What changes were proposed in this pull request?
    
    bugfix for incomplete interval values, e.g. interval '1', interval '1 day 2', currently these cases will result null, but actually we should fail them with IllegalArgumentsException
    
    ### Why are the changes needed?
    
    correctness
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, incomplete intervals will throw exception now
    
    #### before
    ```
    bin/spark-sql -S -e "select interval '1', interval '+', interval '1 day -'"
    
    NULL NULL NULL
    ```
    #### after
    
    ```
    -- !query
    select interval '1'
    -- !query schema
    struct<>
    -- !query output
    org.apache.spark.sql.catalyst.parser.ParseException
    
    Cannot parse the INTERVAL value: 1(line 1, pos 7)
    
    == SQL ==
    select interval '1'
    ```
    
    ### How was this patch tested?
    
    unit tests added
    
    Closes #29658 from yaooqinn/SPARK-32785-30.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 docs/sql-migration-guide.md                        |   4 +
 .../spark/sql/catalyst/util/IntervalUtils.scala    |   5 +-
 .../sql/catalyst/util/IntervalUtilsSuite.scala     |  13 +++
 .../test/resources/sql-tests/inputs/interval.sql   |   8 ++
 .../sql-tests/results/ansi/interval.sql.out        | 100 ++++++++++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 100 ++++++++++++++++++++-
 6 files changed, 227 insertions(+), 3 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 532ac70..e64c037 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 3.0.1 to 3.0.2
+
+  - In Spark 3.0.2, incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'` will fail with IllegalArgumentException. In Spark 3.0.1 and earlier, they result `NULL`s.
+
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 833942d..af5061b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -735,7 +735,10 @@ object IntervalUtils {
     val result = state match {
       case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN =>
         new CalendarInterval(months, days, microseconds)
-      case _ => null
+      case TRIM_BEFORE_VALUE => throwIAE(s"expect a number after '$currentWord' but hit EOL")
+      case VALUE | VALUE_FRACTIONAL_PART =>
+        throwIAE(s"expect a unit name after '$currentWord' but hit EOL")
+      case _ => throwIAE(s"unknown error when parsing '$currentWord'")
     }
 
     result
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
index 3d9372c..1858378 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -77,6 +77,19 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
+  test("string to interval: interval with dangling parts should not results null") {
+    checkFromInvalidString("+", "expect a number after '+' but hit EOL")
+    checkFromInvalidString("-", "expect a number after '-' but hit EOL")
+    checkFromInvalidString("+ 2", "expect a unit name after '2' but hit EOL")
+    checkFromInvalidString("- 1", "expect a unit name after '1' but hit EOL")
+    checkFromInvalidString("1", "expect a unit name after '1' but hit EOL")
+    checkFromInvalidString("1.2", "expect a unit name after '1.2' but hit EOL")
+    checkFromInvalidString("1 day 2", "expect a unit name after '2' but hit EOL")
+    checkFromInvalidString("1 day 2.2", "expect a unit name after '2.2' but hit EOL")
+    checkFromInvalidString("1 day -", "expect a number after '-' but hit EOL")
+    checkFromInvalidString("-.", "expect a unit name after '-.' but hit EOL")
+  }
+
   test("string to interval: multiple units") {
     Seq(
       "-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index ebc39f5..9ddefa5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -180,3 +180,11 @@ SELECT
   to_json(from_json('{"a":"1 days"}', 'a interval')),
   to_json(map('a', interval 25 month 100 day 130 minute)),
   from_json(to_json(map('a', interval 25 month 100 day 130 minute)), 'a interval');
+
+select interval '+';
+select interval '+.';
+select interval '1';
+select interval '1.2';
+select interval '- 2';
+select interval '1 day -';
+select interval '1 day 1';
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 6a6a1df..eb84f3e 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 93
+-- Number of queries: 100
 
 
 -- !query
@@ -956,3 +956,101 @@ SELECT
 struct<from_json({"a":"1 days"}):struct<a:interval>,to_json(from_json({"a":"1 days"})):string,to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes')):string,from_json(to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes'))):struct<a:interval>>
 -- !query output
 {"a":1 days}	{"a":"1 days"}	{"a":"2 years 1 months 100 days 2 hours 10 minutes"}	{"a":2 years 1 months 100 days 2 hours 10 minutes}
+
+
+-- !query
+select interval '+'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: +(line 1, pos 7)
+
+== SQL ==
+select interval '+'
+-------^^^
+
+
+-- !query
+select interval '+.'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: +.(line 1, pos 7)
+
+== SQL ==
+select interval '+.'
+-------^^^
+
+
+-- !query
+select interval '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1(line 1, pos 7)
+
+== SQL ==
+select interval '1'
+-------^^^
+
+
+-- !query
+select interval '1.2'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1.2(line 1, pos 7)
+
+== SQL ==
+select interval '1.2'
+-------^^^
+
+
+-- !query
+select interval '- 2'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: - 2(line 1, pos 7)
+
+== SQL ==
+select interval '- 2'
+-------^^^
+
+
+-- !query
+select interval '1 day -'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1 day -(line 1, pos 7)
+
+== SQL ==
+select interval '1 day -'
+-------^^^
+
+
+-- !query
+select interval '1 day 1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7)
+
+== SQL ==
+select interval '1 day 1'
+-------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 63ed9b5..c5dd1e0 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 93
+-- Number of queries: 100
 
 
 -- !query
@@ -928,3 +928,101 @@ SELECT
 struct<from_json({"a":"1 days"}):struct<a:interval>,to_json(from_json({"a":"1 days"})):string,to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes')):string,from_json(to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes'))):struct<a:interval>>
 -- !query output
 {"a":1 days}	{"a":"1 days"}	{"a":"2 years 1 months 100 days 2 hours 10 minutes"}	{"a":2 years 1 months 100 days 2 hours 10 minutes}
+
+
+-- !query
+select interval '+'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: +(line 1, pos 7)
+
+== SQL ==
+select interval '+'
+-------^^^
+
+
+-- !query
+select interval '+.'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: +.(line 1, pos 7)
+
+== SQL ==
+select interval '+.'
+-------^^^
+
+
+-- !query
+select interval '1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1(line 1, pos 7)
+
+== SQL ==
+select interval '1'
+-------^^^
+
+
+-- !query
+select interval '1.2'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1.2(line 1, pos 7)
+
+== SQL ==
+select interval '1.2'
+-------^^^
+
+
+-- !query
+select interval '- 2'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: - 2(line 1, pos 7)
+
+== SQL ==
+select interval '- 2'
+-------^^^
+
+
+-- !query
+select interval '1 day -'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1 day -(line 1, pos 7)
+
+== SQL ==
+select interval '1 day -'
+-------^^^
+
+
+-- !query
+select interval '1 day 1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7)
+
+== SQL ==
+select interval '1 day 1'
+-------^^^


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org