You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/03/10 14:28:40 UTC
[spark] branch branch-3.0 updated: [SPARK-30189][SQL] Interval from
year-month/date-time string should handle whitespaces
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 558a82c [SPARK-30189][SQL] Interval from year-month/date-time string should handle whitespaces
558a82c is described below
commit 558a82cbd4ee6d463ed5c4460b327340511aa6d4
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Tue Mar 10 22:08:58 2020 +0800
[SPARK-30189][SQL] Interval from year-month/date-time string should handle whitespaces
### What changes were proposed in this pull request?
Currently, we parse interval from multi units strings or from date-time/year-month pattern strings, the former handles all whitespace, the latter not or even spaces.
### Why are the changes needed?
behavior consistency
### Does this PR introduce any user-facing change?
yes, interval in date-time/year-month like
```
select interval '\n-\t10\t 12:34:46.789\t' day to second
-- !query 126 schema
struct<INTERVAL '-10 days -12 hours -34 minutes -46.789 seconds':interval>
-- !query 126 output
-10 days -12 hours -34 minutes -46.789 seconds
```
is valid now.
### How was this patch tested?
add ut.
Closes #26815 from yaooqinn/SPARK-30189.
Authored-by: Kent Yao <ya...@hotmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 3bd6ebff81a46c3bf3664c4be1714c3002d92e85)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/util/IntervalUtils.scala | 5 +--
.../catalyst/parser/ExpressionParserSuite.scala | 5 ++-
.../sql/catalyst/util/IntervalUtilsSuite.scala | 14 +++++++
.../test/resources/sql-tests/inputs/interval.sql | 4 ++
.../sql-tests/results/ansi/interval.sql.out | 47 +++++++++++++++++++++-
.../resources/sql-tests/results/interval.sql.out | 47 +++++++++++++++++++++-
6 files changed, 115 insertions(+), 7 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 0a13ec8..ccf8c5e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -136,8 +136,7 @@ object IntervalUtils {
s"Error parsing interval year-month string: ${e.getMessage}", e)
}
}
- assert(input.length == input.trim.length)
- input match {
+ input.trim match {
case yearMonthPattern("-", yearStr, monthStr) =>
negateExact(toInterval(yearStr, monthStr))
case yearMonthPattern(_, yearStr, monthStr) =>
@@ -300,7 +299,7 @@ object IntervalUtils {
val regexp = dayTimePattern.get(from -> to)
require(regexp.isDefined, s"Cannot support (interval '$input' $from to $to) expression")
val pattern = regexp.get.pattern
- val m = pattern.matcher(input)
+ val m = pattern.matcher(input.trim)
require(m.matches, s"Interval string must match day-time format of '$pattern': $input, " +
s"$fallbackNotice")
var micros: Long = 0L
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index e8beb61..74fd48d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -694,7 +694,7 @@ class ExpressionParserSuite extends AnalysisTest {
intercept("interval 10 nanoseconds", "invalid unit 'nanoseconds'")
// Year-Month intervals.
- val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
+ val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0", "\t -1-2\t")
yearMonthValues.foreach { value =>
val result = Literal(IntervalUtils.fromYearMonthString(value))
checkIntervals(s"'$value' year to month", result)
@@ -707,7 +707,8 @@ class ExpressionParserSuite extends AnalysisTest {
"10 9:8:7.123456789",
"1 0:0:0",
"-1 0:0:0",
- "1 0:0:1")
+ "1 0:0:1",
+ "\t 1 0:0:1 ")
datTimeValues.foreach { value =>
val result = Literal(IntervalUtils.fromDayTimeString(value))
checkIntervals(s"'$value' day to second", result)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
index 1628a61..3d9372c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -137,6 +137,15 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
failFuncWithInvalidInput("99-15", "month 15 outside range", fromYearMonthString)
failFuncWithInvalidInput("9a9-15", "Interval string does not match year-month format",
fromYearMonthString)
+
+ // whitespaces
+ assert(fromYearMonthString("99-10 ") === new CalendarInterval(99 * 12 + 10, 0, 0L))
+ assert(fromYearMonthString("+99-10\t") === new CalendarInterval(99 * 12 + 10, 0, 0L))
+ assert(fromYearMonthString("\t\t-8-10\t") === new CalendarInterval(-8 * 12 - 10, 0, 0L))
+ failFuncWithInvalidInput("99\t-15", "Interval string does not match year-month format",
+ fromYearMonthString)
+ failFuncWithInvalidInput("-\t99-15", "Interval string does not match year-month format",
+ fromYearMonthString)
}
test("from day-time string - legacy") {
@@ -312,6 +321,11 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
checkFail("5 30:12:20", DAY, SECOND, "hour 30 outside range")
checkFail("5 30-12", DAY, SECOND, "must match day-time format")
checkFail("5 1:12:20", HOUR, MICROSECOND, "Cannot support (interval")
+
+ // whitespaces
+ check("\t +5 12:40\t ", DAY, MINUTE, "5 days 12 hours 40 minutes")
+ checkFail("+5\t 12:40", DAY, MINUTE, "must match day-time format")
+
}
test("interval overflow check") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 4f26e75..fec11b4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -153,6 +153,10 @@ select interval 'interval \t 1\tday';
select interval 'interval\t1\tday';
select interval '1\t' day;
select interval '1 ' day;
+select interval '2-2\t' year to month;
+select interval '-\t2-2\t' year to month;
+select interval '\n0 12:34:46.789\t' day to second;
+select interval '\n-\t10\t 12:34:46.789\t' day to second;
-- interval overflow if (ansi) exception else NULL
select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 4a41dd6..d4238c7 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 85
+-- Number of queries: 89
-- !query
@@ -805,6 +805,51 @@ struct<INTERVAL '1 days':interval>
-- !query
+select interval '2-2\t' year to month
+-- !query schema
+struct<INTERVAL '2 years 2 months':interval>
+-- !query output
+2 years 2 months
+
+
+-- !query
+select interval '-\t2-2\t' year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Interval string does not match year-month format of 'y-m': - 2-2 (line 1, pos 16)
+
+== SQL ==
+select interval '-\t2-2\t' year to month
+----------------^^^
+
+
+-- !query
+select interval '\n0 12:34:46.789\t' day to second
+-- !query schema
+struct<INTERVAL '12 hours 34 minutes 46.789 seconds':interval>
+-- !query output
+12 hours 34 minutes 46.789 seconds
+
+
+-- !query
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$':
+- 10 12:34:46.789 , set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+----------------^^^
+
+
+-- !query
select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
-- !query schema
struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index f1af335..7a3dd74 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 85
+-- Number of queries: 89
-- !query
@@ -784,6 +784,51 @@ struct<INTERVAL '1 days':interval>
-- !query
+select interval '2-2\t' year to month
+-- !query schema
+struct<INTERVAL '2 years 2 months':interval>
+-- !query output
+2 years 2 months
+
+
+-- !query
+select interval '-\t2-2\t' year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Interval string does not match year-month format of 'y-m': - 2-2 (line 1, pos 16)
+
+== SQL ==
+select interval '-\t2-2\t' year to month
+----------------^^^
+
+
+-- !query
+select interval '\n0 12:34:46.789\t' day to second
+-- !query schema
+struct<INTERVAL '12 hours 34 minutes 46.789 seconds':interval>
+-- !query output
+12 hours 34 minutes 46.789 seconds
+
+
+-- !query
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$':
+- 10 12:34:46.789 , set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '\n-\t10\t 12:34:46.789\t' day to second
+----------------^^^
+
+
+-- !query
select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
-- !query schema
struct<(- a):interval>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org