You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/05 16:51:43 UTC

[spark] branch branch-3.0 updated: [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 1fc09e3  [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting
1fc09e3 is described below

commit 1fc09e3cfbd08c385a8c5cffa3e85a089852f8bc
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Fri Jun 5 16:44:16 2020 +0000

    [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting
    
    ### What changes were proposed in this pull request?
    
    In this PR, we throw `SparkUpgradeException` when getting `DateTimeException` for datetime formatting in the `EXCEPTION` legacy Time Parser Policy.
    
    ### Why are the changes needed?
    `DateTimeException` is also declared by `java.time.format.DateTimeFormatter#format`, but in Spark, it can barely occur. We have suspected one that due to a JDK bug so far. see https://bugs.openjdk.java.net/browse/JDK-8079628.
    
    For `from_unixtime` function, we will suppress the DateTimeException caused by `DD` and result `NULL`. It is a silent date change that should be avoided in Java 8.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes,  when running on Java8 and using `from_unixtime` function with pattern `DD` to format datetimes, if dayofyear>=100, `SparkUpgradeException` will alert users instead of silently resulting null. For `date_format`, `SparkUpgradeException` take the palace of  `DateTimeException`.
    
    ### How was this patch tested?
    
    add unit tests.
    
    Closes #28736 from yaooqinn/SPARK-31867-F.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit fc6af9d900ec6f6a1cbe8f987857a69e6ef600d1)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/catalyst/util/DateFormatter.scala    |  7 +++--
 .../catalyst/util/DateTimeFormatterHelper.scala    | 30 +++++++++++++++++++---
 .../sql/catalyst/util/TimestampFormatter.scala     |  7 +++--
 .../catalyst/util/TimestampFormatterSuite.scala    | 15 +++++++++++
 4 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index fbc9f56..ec8db46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -58,12 +58,15 @@ class Iso8601DateFormatter(
       try {
         val localDate = toLocalDate(formatter.parse(s))
         localDateToDays(localDate)
-      } catch checkDiffResult(s, legacyFormatter.parse)
+      } catch checkParsedDiff(s, legacyFormatter.parse)
     }
   }
 
   override def format(localDate: LocalDate): String = {
-    localDate.format(formatter)
+    try {
+      localDate.format(formatter)
+    } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)),
+      (d: Date) => format(d))
   }
 
   override def format(days: Int): String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 8e5c865..992a2b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -21,7 +21,7 @@ import java.time._
 import java.time.chrono.IsoChronology
 import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle}
 import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
-import java.util.Locale
+import java.util.{Date, Locale}
 
 import com.google.common.cache.CacheBuilder
 
@@ -109,13 +109,17 @@ trait DateTimeFormatterHelper {
     formatter
   }
 
+  private def needConvertToSparkUpgradeException(e: Throwable): Boolean = e match {
+    case _: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => true
+    case _ => false
+  }
   // When legacy time parser policy set to EXCEPTION, check whether we will get different results
   // between legacy parser and new parser. If new parser fails but legacy parser works, throw a
   // SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
   // DateTimeParseException will address by the caller side.
-  protected def checkDiffResult[T](
+  protected def checkParsedDiff[T](
       s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
-    case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
+    case e if needConvertToSparkUpgradeException(e) =>
       try {
         legacyParseFunc(s)
       } catch {
@@ -126,6 +130,25 @@ trait DateTimeFormatterHelper {
         s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
   }
 
+  // When legacy time parser policy set to EXCEPTION, check whether we will get different results
+  // between legacy formatter and new formatter. If new formatter fails but legacy formatter works,
+  // throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
+  // DateTimeParseException will address by the caller side.
+  protected def checkFormattedDiff[T <: Date](
+      d: T,
+      legacyFormatFunc: T => String): PartialFunction[Throwable, String] = {
+    case e if needConvertToSparkUpgradeException(e) =>
+      val resultCandidate = try {
+        legacyFormatFunc(d)
+      } catch {
+        case _: Throwable => throw e
+      }
+      throw new SparkUpgradeException("3.0", s"Fail to format it to '$resultCandidate' in the new" +
+        s" formatter. You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore" +
+        " the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid" +
+        " datetime string.", e)
+  }
+
   /**
    * When the new DateTimeFormatter failed to initialize because of invalid datetime pattern, it
    * will throw IllegalArgumentException. If the pattern can be recognized by the legacy formatter
@@ -137,7 +160,6 @@ trait DateTimeFormatterHelper {
    * @param tryLegacyFormatter a func to capture exception, identically which forces a legacy
    *                           datetime formatter to be initialized
    */
-
   protected def checkLegacyFormatter(
       pattern: String,
       tryLegacyFormatter: => Unit): PartialFunction[Throwable, DateTimeFormatter] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 6bcbb09..87d8a3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -84,12 +84,15 @@ class Iso8601TimestampFormatter(
         val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
 
         Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
-      } catch checkDiffResult(s, legacyFormatter.parse)
+      } catch checkParsedDiff(s, legacyFormatter.parse)
     }
   }
 
   override def format(instant: Instant): String = {
-    formatter.withZone(zoneId).format(instant)
+    try {
+      formatter.withZone(zoneId).format(instant)
+    } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)),
+      (t: Timestamp) => format(t))
   }
 
   override def format(us: Long): String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 311097f..88f6b0d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -415,4 +415,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
     val t5 = f3.parse("AM")
     assert(t5 === date(1970))
   }
+
+  test("check result differences for datetime formatting") {
+    val formatter = TimestampFormatter("DD", UTC, isParsing = false)
+    assert(formatter.format(date(1970, 1, 3)) == "03")
+    assert(formatter.format(date(1970, 4, 9)) == "99")
+
+    if (System.getProperty("java.version").split("\\D+")(0).toInt < 9) {
+      // https://bugs.openjdk.java.net/browse/JDK-8079628
+      intercept[SparkUpgradeException] {
+        formatter.format(date(1970, 4, 10))
+      }
+    } else {
+      assert(formatter.format(date(1970, 4, 10)) == "100")
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org