You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/07/29 01:18:16 UTC

[spark] branch branch-3.2 updated: [SPARK-36286][SQL] Block some invalid datetime string

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new fa521c1  [SPARK-36286][SQL] Block some invalid datetime string
fa521c1 is described below

commit fa521c15069a9731661df670e227a8c53c36be33
Author: Linhong Liu <li...@databricks.com>
AuthorDate: Thu Jul 29 09:16:46 2021 +0800

    [SPARK-36286][SQL] Block some invalid datetime string
    
    ### What changes were proposed in this pull request?
    In PR #32959, we found some weird datetime strings that can be parsed. ([details](https://github.com/apache/spark/pull/32959#discussion_r665015489))
    This PR blocks the invalid datetime string.
    
    ### Why are the changes needed?
    bug fix
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, below strings will have different results when cast to datetime.
    ```sql
    select cast('12::' as timestamp); -- Before: 2021-07-07 12:00:00, After: NULL
    select cast('T' as timestamp); -- Before: 2021-07-07 00:00:00, After: NULL
    ```
    
    ### How was this patch tested?
    some new test cases
    
    Closes #33490 from linhongliu-db/SPARK-35780-block-invalid-format.
    
    Authored-by: Linhong Liu <li...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit ed0e351f05ac6edc132c3a630206b2031c419e1c)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala  | 7 +++++--
 .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala     | 4 ++++
 .../org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala | 1 -
 .../org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala   | 8 +++++++-
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 0825a11..36d2b9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -254,7 +254,9 @@ object DateTimeUtils {
       val maxDigitsYear = 6
       // For the nanosecond part, more than 6 digits is allowed, but will be truncated.
       segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
-        (segment != 0 && segment != 6 && digits <= 2)
+        // For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
+        (segment == 7 && digits <= 2) ||
+        (segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
     }
     if (s == null || s.trimAll().numBytes() == 0) {
       return (Array.empty, None, false)
@@ -527,7 +529,8 @@ object DateTimeUtils {
     def isValidDigits(segment: Int, digits: Int): Boolean = {
       // An integer is able to represent a date within [+-]5 million years.
       var maxDigitsYear = 7
-      (segment == 0 && digits >= 4 && digits <= maxDigitsYear) || (segment != 0 && digits <= 2)
+      (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
+        (segment != 0 && digits > 0 && digits <= 2)
     }
     if (s == null || s.trimAll().numBytes() == 0) {
       return None
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 26270e6..4e247f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -576,4 +576,8 @@ class CastSuite extends CastSuiteBase {
       checkEvaluation(cast(invalidInput, TimestampNTZType), null)
     }
   }
+
+  test("SPARK-36286: invalid string cast to timestamp") {
+    checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index dcdc6f9..f01fea8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -150,7 +150,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       c.set(Calendar.MILLISECOND, 0)
       checkCastStringToTimestamp("2015-03-18", new Timestamp(c.getTimeInMillis))
       checkCastStringToTimestamp("2015-03-18 ", new Timestamp(c.getTimeInMillis))
-      checkCastStringToTimestamp("2015-03-18T", new Timestamp(c.getTimeInMillis))
 
       c = Calendar.getInstance(tz)
       c.set(2015, 2, 18, 12, 3, 17)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 2b7b941..9e61cb97 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -147,6 +147,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     assert(toDate("1999 08 01").isEmpty)
     assert(toDate("1999-08 01").isEmpty)
     assert(toDate("1999 08").isEmpty)
+    assert(toDate("1999-08-").isEmpty)
     assert(toDate("").isEmpty)
     assert(toDate("   ").isEmpty)
   }
@@ -182,7 +183,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, zid = zid)))
       checkStringToTimestamp("0001", Option(date(1, 1, 1, 0, zid = zid)))
       checkStringToTimestamp("2015-03", Option(date(2015, 3, 1, zid = zid)))
-      Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18T").foreach { s =>
+      Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ").foreach { s =>
         checkStringToTimestamp(s, Option(date(2015, 3, 18, zid = zid)))
       }
 
@@ -289,6 +290,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       checkStringToTimestamp("", None)
       checkStringToTimestamp("    ", None)
       checkStringToTimestamp("+", None)
+      checkStringToTimestamp("T", None)
+      checkStringToTimestamp("2015-03-18T", None)
+      checkStringToTimestamp("12::", None)
+      checkStringToTimestamp("2015-03-18T12:03:17-8:", None)
+      checkStringToTimestamp("2015-03-18T12:03:17-8:30:", None)
 
       // Truncating the fractional seconds
       expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = UTC))

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org