You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/07/29 01:18:16 UTC
[spark] branch branch-3.2 updated: [SPARK-36286][SQL] Block some
invalid datetime string
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new fa521c1 [SPARK-36286][SQL] Block some invalid datetime string
fa521c1 is described below
commit fa521c15069a9731661df670e227a8c53c36be33
Author: Linhong Liu <li...@databricks.com>
AuthorDate: Thu Jul 29 09:16:46 2021 +0800
[SPARK-36286][SQL] Block some invalid datetime string
### What changes were proposed in this pull request?
In PR #32959, we found some weird datetime strings that can be parsed. ([details](https://github.com/apache/spark/pull/32959#discussion_r665015489))
This PR blocks the invalid datetime string.
### Why are the changes needed?
bug fix
### Does this PR introduce _any_ user-facing change?
Yes, below strings will have different results when cast to datetime.
```sql
select cast('12::' as timestamp); -- Before: 2021-07-07 12:00:00, After: NULL
select cast('T' as timestamp); -- Before: 2021-07-07 00:00:00, After: NULL
```
### How was this patch tested?
some new test cases
Closes #33490 from linhongliu-db/SPARK-35780-block-invalid-format.
Authored-by: Linhong Liu <li...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit ed0e351f05ac6edc132c3a630206b2031c419e1c)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 7 +++++--
.../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 4 ++++
.../org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala | 1 -
.../org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 8 +++++++-
4 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 0825a11..36d2b9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -254,7 +254,9 @@ object DateTimeUtils {
val maxDigitsYear = 6
// For the nanosecond part, more than 6 digits is allowed, but will be truncated.
segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
- (segment != 0 && segment != 6 && digits <= 2)
+ // For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
+ (segment == 7 && digits <= 2) ||
+ (segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
}
if (s == null || s.trimAll().numBytes() == 0) {
return (Array.empty, None, false)
@@ -527,7 +529,8 @@ object DateTimeUtils {
def isValidDigits(segment: Int, digits: Int): Boolean = {
// An integer is able to represent a date within [+-]5 million years.
var maxDigitsYear = 7
- (segment == 0 && digits >= 4 && digits <= maxDigitsYear) || (segment != 0 && digits <= 2)
+ (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
+ (segment != 0 && digits > 0 && digits <= 2)
}
if (s == null || s.trimAll().numBytes() == 0) {
return None
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 26270e6..4e247f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -576,4 +576,8 @@ class CastSuite extends CastSuiteBase {
checkEvaluation(cast(invalidInput, TimestampNTZType), null)
}
}
+
+ test("SPARK-36286: invalid string cast to timestamp") {
+ checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null)
+ }
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index dcdc6f9..f01fea8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -150,7 +150,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
c.set(Calendar.MILLISECOND, 0)
checkCastStringToTimestamp("2015-03-18", new Timestamp(c.getTimeInMillis))
checkCastStringToTimestamp("2015-03-18 ", new Timestamp(c.getTimeInMillis))
- checkCastStringToTimestamp("2015-03-18T", new Timestamp(c.getTimeInMillis))
c = Calendar.getInstance(tz)
c.set(2015, 2, 18, 12, 3, 17)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 2b7b941..9e61cb97 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -147,6 +147,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
assert(toDate("1999 08 01").isEmpty)
assert(toDate("1999-08 01").isEmpty)
assert(toDate("1999 08").isEmpty)
+ assert(toDate("1999-08-").isEmpty)
assert(toDate("").isEmpty)
assert(toDate(" ").isEmpty)
}
@@ -182,7 +183,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, zid = zid)))
checkStringToTimestamp("0001", Option(date(1, 1, 1, 0, zid = zid)))
checkStringToTimestamp("2015-03", Option(date(2015, 3, 1, zid = zid)))
- Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18T").foreach { s =>
+ Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ").foreach { s =>
checkStringToTimestamp(s, Option(date(2015, 3, 18, zid = zid)))
}
@@ -289,6 +290,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
checkStringToTimestamp("", None)
checkStringToTimestamp(" ", None)
checkStringToTimestamp("+", None)
+ checkStringToTimestamp("T", None)
+ checkStringToTimestamp("2015-03-18T", None)
+ checkStringToTimestamp("12::", None)
+ checkStringToTimestamp("2015-03-18T12:03:17-8:", None)
+ checkStringToTimestamp("2015-03-18T12:03:17-8:30:", None)
// Truncating the fractional seconds
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = UTC))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org