You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/03/09 10:56:35 UTC

[GitHub] [spark] cloud-fan commented on a change in pull request #27830: [SPARK-31030][SQL] Backward Compatibility for Parsing and formatting Datetime

cloud-fan commented on a change in pull request #27830: [SPARK-31030][SQL] Backward Compatibility for Parsing and formatting Datetime
URL: https://github.com/apache/spark/pull/27830#discussion_r389588564
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 ##########
 @@ -906,4 +906,44 @@ object DateTimeUtils {
     val days = period.getDays
     new CalendarInterval(months, days, 0)
   }
+
+  /**
+   * Since the Proleptic Gregorian calendar is de-facto calendar worldwide, as well as the chosen
+   * one in ANSI SQL standard, Spark 3.0 switches to it by using Java 8 API classes. However, the
+   * breaking changes between Java 7 and Java 8 pattern string will also breaks the backward
+   * compatibility of Spark 2.4 and earlier when parsing datetime. This function converts all
+   * incompatible pattern for the new parser in Spark 3.0. See more details in SPARK-31030.
+   *
+   * @param pattern The input pattern.
+   * @return The pattern for new parser
+   */
+  def convertIncompatiblePattern(pattern: String): String = {
+    val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
+      case (patternPart, index) =>
+        // Text can be quoted using single quotes, we only check the non-quote parts.
+        index % 2 == 0 && patternPart.contains("G")
+    }
+    pattern.split("'").zipWithIndex.map {
+      case (patternPart, index) =>
+        if (index % 2 == 0) {
+          // The meaning of 'u' was day number of week in Java 7, it changed to year in Java 8.
+          // Substitute 'u' to 'e' and use Java 8 parser to parse the string. If parsable, return
+          // the result; otherwise, fall back to 'u', and then use the legacy Java 7 parser to
+          // parse. When it is successfully parsed, throw an exception and ask users to change
+          // the pattern strings or turn on the legacy mode; otherwise, return NULL as what Spark
+          // 2.4 does.
+          val res = patternPart.replace("u", "e")
+          // In Java 8 API, 'u' supports negative years. We substitute 'y' to 'u' here for keeping
+          // the support in Spark 3.0. If parse failed in Spark 3.0, fall back to 'y'. We only do
+          // this substitution when there is no era designator found in the pattern.
+          if (!eraDesignatorContained) {
+            res.replace("y", "u")
+          } else {
+            res
+          }
+        } else {
+            patternPart
 
 Review comment:
   nit: indentation

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org