You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/03 06:54:20 UTC

[spark] branch branch-3.0 updated: [SPARK-31892][SQL] Disable week-based date filed for parsing

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 5a271c9  [SPARK-31892][SQL] Disable week-based date filed for parsing
5a271c9 is described below

commit 5a271c9df67419beae791aa1c28e695fc77e591d
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Wed Jun 3 06:49:58 2020 +0000

    [SPARK-31892][SQL] Disable week-based date filed for parsing
    
    This PR disables week-based date filed for parsing
    
    closes #28674
    
    1. It's an un-fixable behavior change to fill the gap between SimpleDateFormat and DateTimeFormater and backward-compatibility for different JDKs.A lot of effort has been made to prove it at https://github.com/apache/spark/pull/28674
    
    2. The existing behavior itself in 2.4 is confusing, e.g.
    
    ```sql
    spark-sql> select to_timestamp('1', 'w');
    1969-12-28 00:00:00
    spark-sql> select to_timestamp('1', 'u');
    1970-01-05 00:00:00
    ```
      The 'u' here seems not to go to the Monday of the first week in week-based form or the first day of the year in non-week-based form but go to the Monday of the second week in week-based form.
    
    And, e.g.
    ```sql
    spark-sql> select to_timestamp('2020 2020', 'YYYY yyyy');
    2020-01-01 00:00:00
    spark-sql> select to_timestamp('2020 2020', 'yyyy YYYY');
    2019-12-29 00:00:00
    spark-sql> select to_timestamp('2020 2020 1', 'YYYY yyyy w');
    NULL
    spark-sql> select to_timestamp('2020 2020 1', 'yyyy YYYY w');
    2019-12-29 00:00:00
    ```
    
      I think we don't need to introduce all the weird behavior from Java
    
    3. The current test coverage for week-based date fields is almost 0%, which indicates that we've never imagined using it.
    
    4. Avoiding JDK bugs
    
    https://issues.apache.org/jira/browse/SPARK-31880
    
    Yes, the 'Y/W/w/u/F/E' pattern cannot be used datetime parsing functions.
    
    more tests added
    
    Closes #28706 from yaooqinn/SPARK-31892.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit afe95bd9ad7a07c49deecf05f0a1000bb8f80caa)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 docs/sql-ref-datetime-pattern.md                   |  2 ++
 .../spark/sql/catalyst/catalog/interface.scala     |  2 +-
 .../spark/sql/catalyst/csv/CSVInferSchema.scala    |  2 +-
 .../sql/catalyst/csv/UnivocityGenerator.scala      |  5 ++--
 .../spark/sql/catalyst/csv/UnivocityParser.scala   |  5 ++--
 .../catalyst/expressions/datetimeExpressions.scala | 12 ++++-----
 .../spark/sql/catalyst/json/JacksonGenerator.scala |  5 ++--
 .../spark/sql/catalyst/json/JacksonParser.scala    |  5 ++--
 .../spark/sql/catalyst/json/JsonInferSchema.scala  |  2 +-
 .../spark/sql/catalyst/util/DateFormatter.scala    | 13 ++++++----
 .../catalyst/util/DateTimeFormatterHelper.scala    | 17 +++++++++----
 .../sql/catalyst/util/TimestampFormatter.scala     | 16 ++++++------
 .../expressions/DateExpressionsSuite.scala         | 29 ++++++++++++++++++++++
 .../util/DateTimeFormatterHelperSuite.scala        |  7 ++++++
 .../apache/spark/sql/util/DateFormatterSuite.scala |  9 ++++---
 .../spark/sql/util/TimestampFormatterSuite.scala   | 12 ++++-----
 .../execution/datasources/PartitioningUtils.scala  |  2 +-
 .../parquet/ParquetPartitionDiscoverySuite.scala   |  2 +-
 18 files changed, 101 insertions(+), 46 deletions(-)

diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md
index 865b947..5859ad8 100644
--- a/docs/sql-ref-datetime-pattern.md
+++ b/docs/sql-ref-datetime-pattern.md
@@ -136,6 +136,8 @@ The count of pattern letters determines the format.
   During formatting, all valid data will be output even it is in the optional section.
   During parsing, the whole section may be missing from the parsed string.
   An optional section is started by `[` and ended using `]` (or at the end of the pattern).
+  
+- Symbols of 'Y', 'W', 'w', 'E', 'u', 'F', 'q' and 'Q' can only be used for datetime formatting, e.g. `date_format`. They are not allowed used for datetime parsing, e.g. `to_timestamp`.
 
 More details for the text style:
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index d02776b..4e63ee7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -525,7 +525,7 @@ object CatalogColumnStat extends Logging {
     TimestampFormatter(
       format = "yyyy-MM-dd HH:mm:ss.SSSSSS",
       zoneId = ZoneOffset.UTC,
-      needVarLengthSecondFraction = isParsing)
+      isParsing = isParsing)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index a7c2435..f0df18d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -35,7 +35,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    needVarLengthSecondFraction = true)
+    isParsing = true)
 
   private val decimalParser = if (options.locale == Locale.US) {
     // Special handling the default locale for backward compatibility
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 4990da2..a3ee129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -47,12 +47,13 @@ class UnivocityGenerator(
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    needVarLengthSecondFraction = false)
+    isParsing = false)
   private val dateFormatter = DateFormatter(
     options.dateFormat,
     options.zoneId,
     options.locale,
-    legacyFormat = FAST_DATE_FORMAT)
+    legacyFormat = FAST_DATE_FORMAT,
+    isParsing = false)
 
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case DateType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index f2bb7db..3898eca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -90,12 +90,13 @@ class UnivocityParser(
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    needVarLengthSecondFraction = true)
+    isParsing = true)
   private lazy val dateFormatter = DateFormatter(
     options.dateFormat,
     options.zoneId,
     options.locale,
-    legacyFormat = FAST_DATE_FORMAT)
+    legacyFormat = FAST_DATE_FORMAT,
+    isParsing = true)
 
   private val csvFilters = new CSVFilters(filters, requiredSchema)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index a0cf446..62f38b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -643,7 +643,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
           format.toString,
           zoneId,
           legacyFormat = SIMPLE_DATE_FORMAT,
-          needVarLengthSecondFraction = false)
+          isParsing = false)
       }
     } else None
   }
@@ -654,7 +654,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
         format.toString,
         zoneId,
         legacyFormat = SIMPLE_DATE_FORMAT,
-        needVarLengthSecondFraction = false)
+        isParsing = false)
     } else {
       formatter.get
     }
@@ -799,7 +799,7 @@ abstract class ToTimestamp
         constFormat.toString,
         zoneId,
         legacyFormat = SIMPLE_DATE_FORMAT,
-        needVarLengthSecondFraction = true)
+        isParsing = true)
     } catch {
       case e: SparkUpgradeException => throw e
       case NonFatal(_) => null
@@ -838,7 +838,7 @@ abstract class ToTimestamp
                 formatString,
                 zoneId,
                 legacyFormat = SIMPLE_DATE_FORMAT,
-                needVarLengthSecondFraction = true)
+                isParsing = true)
                 .parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor
             } catch {
               case e: SparkUpgradeException => throw e
@@ -981,7 +981,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
         constFormat.toString,
         zoneId,
         legacyFormat = SIMPLE_DATE_FORMAT,
-        needVarLengthSecondFraction = false)
+        isParsing = false)
     } catch {
       case e: SparkUpgradeException => throw e
       case NonFatal(_) => null
@@ -1014,7 +1014,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
                 f.toString,
                 zoneId,
                 legacyFormat = SIMPLE_DATE_FORMAT,
-                needVarLengthSecondFraction = false)
+                isParsing = false)
                 .format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
           } catch {
             case e: SparkUpgradeException => throw e
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index f3938fe..fb0ca32 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -83,12 +83,13 @@ private[sql] class JacksonGenerator(
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    needVarLengthSecondFraction = false)
+    isParsing = false)
   private val dateFormatter = DateFormatter(
     options.dateFormat,
     options.zoneId,
     options.locale,
-    legacyFormat = FAST_DATE_FORMAT)
+    legacyFormat = FAST_DATE_FORMAT,
+    isParsing = false)
 
   private def makeWriter(dataType: DataType): ValueWriter = dataType match {
     case NullType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index c4f6121..e038f77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -61,12 +61,13 @@ class JacksonParser(
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    needVarLengthSecondFraction = true)
+    isParsing = true)
   private lazy val dateFormatter = DateFormatter(
     options.dateFormat,
     options.zoneId,
     options.locale,
-    legacyFormat = FAST_DATE_FORMAT)
+    legacyFormat = FAST_DATE_FORMAT,
+    isParsing = true)
 
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 56b1278..de396a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -43,7 +43,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    needVarLengthSecondFraction = true)
+    isParsing = true)
 
   /**
    * Infer the type of a collection of json records in three stages:
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 5711ea0..e178164 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -41,7 +41,8 @@ class Iso8601DateFormatter(
     pattern: String,
     zoneId: ZoneId,
     locale: Locale,
-    legacyFormat: LegacyDateFormats.LegacyDateFormat)
+    legacyFormat: LegacyDateFormats.LegacyDateFormat,
+    isParsing: Boolean)
   extends DateFormatter with DateTimeFormatterHelper {
 
   @transient
@@ -131,12 +132,13 @@ object DateFormatter {
       format: Option[String],
       zoneId: ZoneId,
       locale: Locale = defaultLocale,
-      legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT): DateFormatter = {
+      legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
+      isParsing: Boolean = true): DateFormatter = {
     val pattern = format.getOrElse(defaultPattern)
     if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
       getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
     } else {
-      val df = new Iso8601DateFormatter(pattern, zoneId, locale, legacyFormat)
+      val df = new Iso8601DateFormatter(pattern, zoneId, locale, legacyFormat, isParsing)
       df.validatePatternString()
       df
     }
@@ -159,8 +161,9 @@ object DateFormatter {
       format: String,
       zoneId: ZoneId,
       locale: Locale,
-      legacyFormat: LegacyDateFormat): DateFormatter = {
-    getFormatter(Some(format), zoneId, locale, legacyFormat)
+      legacyFormat: LegacyDateFormat,
+      isParsing: Boolean): DateFormatter = {
+    getFormatter(Some(format), zoneId, locale, legacyFormat, isParsing)
   }
 
   def apply(format: String, zoneId: ZoneId): DateFormatter = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 5b9d839..ffa7cd4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -89,9 +89,9 @@ trait DateTimeFormatterHelper {
   protected def getOrCreateFormatter(
       pattern: String,
       locale: Locale,
-      needVarLengthSecondFraction: Boolean = false): DateTimeFormatter = {
-    val newPattern = convertIncompatiblePattern(pattern)
-    val useVarLen = needVarLengthSecondFraction && newPattern.contains('S')
+      isParsing: Boolean = false): DateTimeFormatter = {
+    val newPattern = convertIncompatiblePattern(pattern, isParsing)
+    val useVarLen = isParsing && newPattern.contains('S')
     val key = (newPattern, locale, useVarLen)
     var formatter = cache.getIfPresent(key)
     if (formatter == null) {
@@ -227,6 +227,12 @@ private object DateTimeFormatterHelper {
     formatter.format(LocalDate.of(2000, 1, 1)) == "1 1"
   }
   final val unsupportedLetters = Set('A', 'c', 'e', 'n', 'N', 'p')
+  // SPARK-31892: The week-based date fields are rarely used and really confusing for parsing values
+  // to datetime, especially when they are mixed with other non-week-based ones
+  // The quarter fields will also be parsed strangely, e.g. when the pattern contains `yMd` and can
+  // be directly resolved then the `q` do check for whether the month is valid, but if the date
+  // fields is incomplete, e.g. `yM`, the checking will be bypassed.
+  final val unsupportedLettersForParsing = Set('Y', 'W', 'w', 'E', 'u', 'F', 'q', 'Q')
   final val unsupportedPatternLengths = {
     // SPARK-31771: Disable Narrow-form TextStyle to avoid silent data change, as it is Full-form in
     // 2.4
@@ -246,7 +252,7 @@ private object DateTimeFormatterHelper {
    * @param pattern The input pattern.
    * @return The pattern for new parser
    */
-  def convertIncompatiblePattern(pattern: String): String = {
+  def convertIncompatiblePattern(pattern: String, isParsing: Boolean = false): String = {
     val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
       case (patternPart, index) =>
         // Text can be quoted using single quotes, we only check the non-quote parts.
@@ -255,7 +261,8 @@ private object DateTimeFormatterHelper {
     (pattern + " ").split("'").zipWithIndex.map {
       case (patternPart, index) =>
         if (index % 2 == 0) {
-          for (c <- patternPart if unsupportedLetters.contains(c)) {
+          for (c <- patternPart if unsupportedLetters.contains(c) ||
+            (isParsing && unsupportedLettersForParsing.contains(c))) {
             throw new IllegalArgumentException(s"Illegal pattern character: $c")
           }
           for (style <- unsupportedPatternLengths if patternPart.contains(style)) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 252e703..0873b85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -272,13 +272,13 @@ object TimestampFormatter {
       zoneId: ZoneId,
       locale: Locale = defaultLocale,
       legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
-      needVarLengthSecondFraction: Boolean = false): TimestampFormatter = {
+      isParsing: Boolean = false): TimestampFormatter = {
     val pattern = format.getOrElse(defaultPattern)
     if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
       getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
     } else {
       val tf = new Iso8601TimestampFormatter(
-        pattern, zoneId, locale, legacyFormat, needVarLengthSecondFraction)
+        pattern, zoneId, locale, legacyFormat, isParsing)
       tf.validatePatternString()
       tf
     }
@@ -304,23 +304,23 @@ object TimestampFormatter {
       zoneId: ZoneId,
       locale: Locale,
       legacyFormat: LegacyDateFormat,
-      needVarLengthSecondFraction: Boolean): TimestampFormatter = {
-    getFormatter(Some(format), zoneId, locale, legacyFormat, needVarLengthSecondFraction)
+      isParsing: Boolean): TimestampFormatter = {
+    getFormatter(Some(format), zoneId, locale, legacyFormat, isParsing)
   }
 
   def apply(
       format: String,
       zoneId: ZoneId,
       legacyFormat: LegacyDateFormat,
-      needVarLengthSecondFraction: Boolean): TimestampFormatter = {
-    getFormatter(Some(format), zoneId, defaultLocale, legacyFormat, needVarLengthSecondFraction)
+      isParsing: Boolean): TimestampFormatter = {
+    getFormatter(Some(format), zoneId, defaultLocale, legacyFormat, isParsing)
   }
 
   def apply(
       format: String,
       zoneId: ZoneId,
-      needVarLengthSecondFraction: Boolean = false): TimestampFormatter = {
-    getFormatter(Some(format), zoneId, needVarLengthSecondFraction = needVarLengthSecondFraction)
+      isParsing: Boolean = false): TimestampFormatter = {
+    getFormatter(Some(format), zoneId, isParsing = isParsing)
   }
 
   def apply(zoneId: ZoneId): TimestampFormatter = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index e00d65f..f3b9d6e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1147,4 +1147,33 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           Literal("yyyy-MM-dd'T'HH:mm:ss.SSSz")), "Fail to parse")
     }
   }
+
+  test("Disable week-based date fields and quarter fields for parsing") {
+
+    def checkSparkUpgrade(c: Char): Unit = {
+      checkExceptionInExpression[SparkUpgradeException](
+        new ParseToTimestamp(Literal("1"), Literal(c.toString)).child, "3.0")
+      checkExceptionInExpression[SparkUpgradeException](
+        new ParseToDate(Literal("1"), Literal(c.toString)).child, "3.0")
+      checkExceptionInExpression[SparkUpgradeException](
+        ToUnixTimestamp(Literal("1"), Literal(c.toString)), "3.0")
+      checkExceptionInExpression[SparkUpgradeException](
+        UnixTimestamp(Literal("1"), Literal(c.toString)), "3.0")
+    }
+
+    def checkNullify(c: Char): Unit = {
+      checkEvaluation(new ParseToTimestamp(Literal("1"), Literal(c.toString)).child, null)
+      checkEvaluation(new ParseToDate(Literal("1"), Literal(c.toString)).child, null)
+      checkEvaluation(ToUnixTimestamp(Literal("1"), Literal(c.toString)), null)
+      checkEvaluation(UnixTimestamp(Literal("1"), Literal(c.toString)), null)
+    }
+
+    Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
+      checkSparkUpgrade(l)
+    }
+
+    Seq('q', 'Q').foreach { l =>
+      checkNullify(l)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
index f0cc4d1..c68bdac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
@@ -40,6 +40,13 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
       val e = intercept[IllegalArgumentException](convertIncompatiblePattern(s"yyyy-MM-dd $l G"))
       assert(e.getMessage === s"Illegal pattern character: $l")
     }
+    unsupportedLettersForParsing.foreach { l =>
+      val e = intercept[IllegalArgumentException] {
+        convertIncompatiblePattern(s"$l", isParsing = true)
+      }
+      assert(e.getMessage === s"Illegal pattern character: $l")
+      assert(convertIncompatiblePattern(s"$l").nonEmpty)
+    }
     unsupportedPatternLengths.foreach { style =>
       val e1 = intercept[IllegalArgumentException] {
         convertIncompatiblePattern(s"yyyy-MM-dd $style")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index 7d503cc..22a1396 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -72,7 +72,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
                   DateFormatter.defaultPattern,
                   getZoneId(timeZone),
                   DateFormatter.defaultLocale,
-                  legacyFormat)
+                  legacyFormat,
+                  isParsing = false)
                 val days = formatter.parse(date)
                 assert(date === formatter.format(days))
                 assert(date === formatter.format(daysToLocalDate(days)))
@@ -106,7 +107,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
                   DateFormatter.defaultPattern,
                   getZoneId(timeZone),
                   DateFormatter.defaultLocale,
-                  legacyFormat)
+                  legacyFormat,
+                  isParsing = false)
                 val date = formatter.format(days)
                 val parsed = formatter.parse(date)
                 assert(days === parsed)
@@ -173,7 +175,8 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
               DateFormatter.defaultPattern,
               getZoneId(timeZone),
               DateFormatter.defaultLocale,
-              legacyFormat)
+              legacyFormat,
+              isParsing = false)
             assert(LocalDate.ofEpochDay(formatter.parse("1000-01-01")) === LocalDate.of(1000, 1, 1))
             assert(formatter.format(LocalDate.of(1000, 1, 1)) === "1000-01-01")
             assert(formatter.format(localDateToDays(LocalDate.of(1000, 1, 1))) === "1000-01-01")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index a72dfb9..9870d5e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -48,7 +48,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
       val formatter = TimestampFormatter(
         "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
         getZoneId(zoneId),
-        needVarLengthSecondFraction = true)
+        isParsing = true)
       val microsSinceEpoch = formatter.parse(localDate)
       assert(microsSinceEpoch === expectedMicros(zoneId))
     }
@@ -73,7 +73,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
           // Test only FAST_DATE_FORMAT because other legacy formats don't support formatting
           // in microsecond precision.
           LegacyDateFormats.FAST_DATE_FORMAT,
-          needVarLengthSecondFraction = false),
+          isParsing = false),
         TimestampFormatter.getFractionFormatter(getZoneId(zoneId))).foreach { formatter =>
         val timestamp = formatter.format(microsSinceEpoch)
         assert(timestamp === expectedTimestamp(zoneId))
@@ -98,7 +98,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
         outstandingZoneIds.foreach { zoneId =>
           val timestamp = TimestampFormatter(pattern, zoneId).format(micros)
           val parsed = TimestampFormatter(
-            pattern, zoneId, needVarLengthSecondFraction = true).parse(timestamp)
+            pattern, zoneId, isParsing = true).parse(timestamp)
           assert(micros === parsed)
         }
       }
@@ -119,7 +119,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
       outstandingZoneIds.foreach { zoneId =>
         val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
         val micros = TimestampFormatter(
-          pattern, zoneId, needVarLengthSecondFraction = true).parse(timestamp)
+          pattern, zoneId, isParsing = true).parse(timestamp)
         val formatted = TimestampFormatter(pattern, zoneId).format(micros)
         assert(timestamp === formatted)
       }
@@ -182,7 +182,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
   test("parsing timestamp strings with various seconds fractions") {
     outstandingZoneIds.foreach { zoneId =>
       def check(pattern: String, input: String, reference: String): Unit = {
-        val formatter = TimestampFormatter(pattern, zoneId, needVarLengthSecondFraction = true)
+        val formatter = TimestampFormatter(pattern, zoneId, isParsing = true)
         val expected = stringToTimestamp(UTF8String.fromString(reference), zoneId).get
         val actual = formatter.parse(input)
         assert(actual === expected)
@@ -289,7 +289,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
                   zoneId,
                   TimestampFormatter.defaultLocale,
                   legacyFormat,
-                  needVarLengthSecondFraction = false)
+                  isParsing = false)
               }.toSeq :+ TimestampFormatter.getFractionFormatter(zoneId)
               formatters.foreach { formatter =>
                 assert(microsToInstant(formatter.parse("1000-01-01 01:02:03"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 292ac6d..f7e225b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -134,7 +134,7 @@ object PartitioningUtils {
     val timestampFormatter = TimestampFormatter(
       timestampPartitionPattern,
       zoneId,
-      needVarLengthSecondFraction = true)
+      isParsing = true)
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
       parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index d3301ce..32a9558 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -60,7 +60,7 @@ abstract class ParquetPartitionDiscoverySuite
   val timeZoneId = ZoneId.systemDefault()
   val df = DateFormatter(timeZoneId)
   val tf = TimestampFormatter(
-    timestampPartitionPattern, timeZoneId, needVarLengthSecondFraction = true)
+    timestampPartitionPattern, timeZoneId, isParsing = true)
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org