You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/03/10 14:28:40 UTC

[spark] branch branch-3.0 updated: [SPARK-30189][SQL] Interval from year-month/date-time string should handle whitespaces

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 558a82c  [SPARK-30189][SQL] Interval from year-month/date-time string should handle whitespaces
558a82c is described below

commit 558a82cbd4ee6d463ed5c4460b327340511aa6d4
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Tue Mar 10 22:08:58 2020 +0800

    [SPARK-30189][SQL] Interval from year-month/date-time string should handle whitespaces
    
    ### What changes were proposed in this pull request?
    
    Currently, we parse interval from multi units strings or from date-time/year-month pattern strings, the former handles all whitespace, the latter not or even spaces.
    
    ### Why are the changes needed?
    
    behavior consistency
    
    ### Does this PR introduce any user-facing change?
    yes, interval in date-time/year-month like
    ```
    select interval '\n-\t10\t 12:34:46.789\t' day to second
    -- !query 126 schema
    struct<INTERVAL '-10 days -12 hours -34 minutes -46.789 seconds':interval>
    -- !query 126 output
    -10 days -12 hours -34 minutes -46.789 seconds
    ```
    is valid now.
    
    ### How was this patch tested?
    
    add ut.
    
    Closes #26815 from yaooqinn/SPARK-30189.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 3bd6ebff81a46c3bf3664c4be1714c3002d92e85)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/util/IntervalUtils.scala    |  5 +--
 .../catalyst/parser/ExpressionParserSuite.scala    |  5 ++-
 .../sql/catalyst/util/IntervalUtilsSuite.scala     | 14 +++++++
 .../test/resources/sql-tests/inputs/interval.sql   |  4 ++
 .../sql-tests/results/ansi/interval.sql.out        | 47 +++++++++++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 47 +++++++++++++++++++++-
 6 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 0a13ec8..ccf8c5e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -136,8 +136,7 @@ object IntervalUtils {
             s"Error parsing interval year-month string: ${e.getMessage}", e)
       }
     }
-    assert(input.length == input.trim.length)
-    input match {
+    input.trim match {
       case yearMonthPattern("-", yearStr, monthStr) =>
         negateExact(toInterval(yearStr, monthStr))
       case yearMonthPattern(_, yearStr, monthStr) =>
@@ -300,7 +299,7 @@ object IntervalUtils {
     val regexp = dayTimePattern.get(from -> to)
     require(regexp.isDefined, s"Cannot support (interval '$input' $from to $to) expression")
     val pattern = regexp.get.pattern
-    val m = pattern.matcher(input)
+    val m = pattern.matcher(input.trim)
     require(m.matches, s"Interval string must match day-time format of '$pattern': $input, " +
       s"$fallbackNotice")
     var micros: Long = 0L
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index e8beb61..74fd48d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -694,7 +694,7 @@ class ExpressionParserSuite extends AnalysisTest {
     intercept("interval 10 nanoseconds", "invalid unit 'nanoseconds'")
 
     // Year-Month intervals.
-    val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
+    val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0", "\t -1-2\t")
     yearMonthValues.foreach { value =>
       val result = Literal(IntervalUtils.fromYearMonthString(value))
       checkIntervals(s"'$value' year to month", result)
@@ -707,7 +707,8 @@ class ExpressionParserSuite extends AnalysisTest {
       "10 9:8:7.123456789",
       "1 0:0:0",
       "-1 0:0:0",
-      "1 0:0:1")
+      "1 0:0:1",
+      "\t 1 0:0:1 ")
     datTimeValues.foreach { value =>
       val result = Literal(IntervalUtils.fromDayTimeString(value))
       checkIntervals(s"'$value' day to second", result)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
index 1628a61..3d9372c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -137,6 +137,15 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     failFuncWithInvalidInput("99-15", "month 15 outside range", fromYearMonthString)
     failFuncWithInvalidInput("9a9-15", "Interval string does not match year-month format",
       fromYearMonthString)
+
+    // whitespaces
+    assert(fromYearMonthString("99-10 ") === new CalendarInterval(99 * 12 + 10, 0, 0L))
+    assert(fromYearMonthString("+99-10\t") === new CalendarInterval(99 * 12 + 10, 0, 0L))
+    assert(fromYearMonthString("\t\t-8-10\t") === new CalendarInterval(-8 * 12 - 10, 0, 0L))
+    failFuncWithInvalidInput("99\t-15", "Interval string does not match year-month format",
+      fromYearMonthString)
+    failFuncWithInvalidInput("-\t99-15", "Interval string does not match year-month format",
+      fromYearMonthString)
   }
 
   test("from day-time string - legacy") {
@@ -312,6 +321,11 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     checkFail("5 30:12:20", DAY, SECOND, "hour 30 outside range")
     checkFail("5 30-12", DAY, SECOND, "must match day-time format")
     checkFail("5 1:12:20", HOUR, MICROSECOND, "Cannot support (interval")
+
+    // whitespaces
+    check("\t +5 12:40\t ", DAY, MINUTE, "5 days 12 hours 40 minutes")
+    checkFail("+5\t 12:40", DAY, MINUTE, "must match day-time format")
+
   }
 
   test("interval overflow check") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 4f26e75..fec11b4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -153,6 +153,10 @@ select interval 'interval \t 1\tday';
 select interval 'interval\t1\tday';
 select interval '1\t' day;
 select interval '1 ' day;
+select interval '2-2\t' year to month;
+select interval '-\t2-2\t' year to month;
+select interval '\n0 12:34:46.789\t' day to second;
+select interval '\n-\t10\t 12:34:46.789\t' day to second;
 
 -- interval overflow if (ansi) exception else NULL
 select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 4a41dd6..d4238c7 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 85
+-- Number of queries: 89
 
 
 -- !query
@@ -805,6 +805,51 @@ struct<INTERVAL '1 days':interval>
 
 
 -- !query
+select interval '2-2\t' year to month
+-- !query schema
+struct<INTERVAL '2 years 2 months':interval>
+-- !query output
+2 years 2 months
+
+
+-- !query
+select interval '-\t2-2\t' year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Interval string does not match year-month format of 'y-m': -	2-2	(line 1, pos 16)
+
+== SQL ==
+select interval '-\t2-2\t' year to month
+----------------^^^
+
+
+-- !query
+select interval '\n0 12:34:46.789\t' day to second
+-- !query schema
+struct<INTERVAL '12 hours 34 minutes 46.789 seconds':interval>
+-- !query output
+12 hours 34 minutes 46.789 seconds
+
+
+-- !query
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 
+-	10	 12:34:46.789	, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+----------------^^^
+
+
+-- !query
 select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
 -- !query schema
 struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index f1af335..7a3dd74 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 85
+-- Number of queries: 89
 
 
 -- !query
@@ -784,6 +784,51 @@ struct<INTERVAL '1 days':interval>
 
 
 -- !query
+select interval '2-2\t' year to month
+-- !query schema
+struct<INTERVAL '2 years 2 months':interval>
+-- !query output
+2 years 2 months
+
+
+-- !query
+select interval '-\t2-2\t' year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Interval string does not match year-month format of 'y-m': -	2-2	(line 1, pos 16)
+
+== SQL ==
+select interval '-\t2-2\t' year to month
+----------------^^^
+
+
+-- !query
+select interval '\n0 12:34:46.789\t' day to second
+-- !query schema
+struct<INTERVAL '12 hours 34 minutes 46.789 seconds':interval>
+-- !query output
+12 hours 34 minutes 46.789 seconds
+
+
+-- !query
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 
+-	10	 12:34:46.789	, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+----------------^^^
+
+
+-- !query
 select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
 -- !query schema
 struct<(- a):interval>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org