You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/05 16:51:43 UTC
[spark] branch branch-3.0 updated: [SPARK-31867][SQL][FOLLOWUP]
Check result differences for datetime formatting
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 1fc09e3 [SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting
1fc09e3 is described below
commit 1fc09e3cfbd08c385a8c5cffa3e85a089852f8bc
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Fri Jun 5 16:44:16 2020 +0000
[SPARK-31867][SQL][FOLLOWUP] Check result differences for datetime formatting
### What changes were proposed in this pull request?
In this PR, we throw `SparkUpgradeException` when getting `DateTimeException` for datetime formatting in the `EXCEPTION` legacy Time Parser Policy.
### Why are the changes needed?
`DateTimeException` is also declared by `java.time.format.DateTimeFormatter#format`, but in Spark, it can barely occur. We have suspected one that due to a JDK bug so far. see https://bugs.openjdk.java.net/browse/JDK-8079628.
For `from_unixtime` function, we will suppress the DateTimeException caused by `DD` and result `NULL`. It is a silent date change that should be avoided in Java 8.
### Does this PR introduce _any_ user-facing change?
Yes, when running on Java8 and using `from_unixtime` function with pattern `DD` to format datetimes, if dayofyear>=100, `SparkUpgradeException` will alert users instead of silently resulting null. For `date_format`, `SparkUpgradeException` take the palace of `DateTimeException`.
### How was this patch tested?
add unit tests.
Closes #28736 from yaooqinn/SPARK-31867-F.
Authored-by: Kent Yao <ya...@hotmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit fc6af9d900ec6f6a1cbe8f987857a69e6ef600d1)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/util/DateFormatter.scala | 7 +++--
.../catalyst/util/DateTimeFormatterHelper.scala | 30 +++++++++++++++++++---
.../sql/catalyst/util/TimestampFormatter.scala | 7 +++--
.../catalyst/util/TimestampFormatterSuite.scala | 15 +++++++++++
4 files changed, 51 insertions(+), 8 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index fbc9f56..ec8db46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -58,12 +58,15 @@ class Iso8601DateFormatter(
try {
val localDate = toLocalDate(formatter.parse(s))
localDateToDays(localDate)
- } catch checkDiffResult(s, legacyFormatter.parse)
+ } catch checkParsedDiff(s, legacyFormatter.parse)
}
}
override def format(localDate: LocalDate): String = {
- localDate.format(formatter)
+ try {
+ localDate.format(formatter)
+ } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)),
+ (d: Date) => format(d))
}
override def format(days: Int): String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 8e5c865..992a2b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -21,7 +21,7 @@ import java.time._
import java.time.chrono.IsoChronology
import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle}
import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
-import java.util.Locale
+import java.util.{Date, Locale}
import com.google.common.cache.CacheBuilder
@@ -109,13 +109,17 @@ trait DateTimeFormatterHelper {
formatter
}
+ private def needConvertToSparkUpgradeException(e: Throwable): Boolean = e match {
+ case _: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION => true
+ case _ => false
+ }
// When legacy time parser policy set to EXCEPTION, check whether we will get different results
// between legacy parser and new parser. If new parser fails but legacy parser works, throw a
// SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
// DateTimeParseException will address by the caller side.
- protected def checkDiffResult[T](
+ protected def checkParsedDiff[T](
s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
- case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
+ case e if needConvertToSparkUpgradeException(e) =>
try {
legacyParseFunc(s)
} catch {
@@ -126,6 +130,25 @@ trait DateTimeFormatterHelper {
s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
}
+ // When legacy time parser policy set to EXCEPTION, check whether we will get different results
+ // between legacy formatter and new formatter. If new formatter fails but legacy formatter works,
+ // throw a SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
+ // DateTimeParseException will address by the caller side.
+ protected def checkFormattedDiff[T <: Date](
+ d: T,
+ legacyFormatFunc: T => String): PartialFunction[Throwable, String] = {
+ case e if needConvertToSparkUpgradeException(e) =>
+ val resultCandidate = try {
+ legacyFormatFunc(d)
+ } catch {
+ case _: Throwable => throw e
+ }
+ throw new SparkUpgradeException("3.0", s"Fail to format it to '$resultCandidate' in the new" +
+ s" formatter. You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore" +
+ " the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid" +
+ " datetime string.", e)
+ }
+
/**
* When the new DateTimeFormatter failed to initialize because of invalid datetime pattern, it
* will throw IllegalArgumentException. If the pattern can be recognized by the legacy formatter
@@ -137,7 +160,6 @@ trait DateTimeFormatterHelper {
* @param tryLegacyFormatter a func to capture exception, identically which forces a legacy
* datetime formatter to be initialized
*/
-
protected def checkLegacyFormatter(
pattern: String,
tryLegacyFormatter: => Unit): PartialFunction[Throwable, DateTimeFormatter] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 6bcbb09..87d8a3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -84,12 +84,15 @@ class Iso8601TimestampFormatter(
val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
- } catch checkDiffResult(s, legacyFormatter.parse)
+ } catch checkParsedDiff(s, legacyFormatter.parse)
}
}
override def format(instant: Instant): String = {
- formatter.withZone(zoneId).format(instant)
+ try {
+ formatter.withZone(zoneId).format(instant)
+ } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)),
+ (t: Timestamp) => format(t))
}
override def format(us: Long): String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 311097f..88f6b0d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -415,4 +415,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
val t5 = f3.parse("AM")
assert(t5 === date(1970))
}
+
+ test("check result differences for datetime formatting") {
+ val formatter = TimestampFormatter("DD", UTC, isParsing = false)
+ assert(formatter.format(date(1970, 1, 3)) == "03")
+ assert(formatter.format(date(1970, 4, 9)) == "99")
+
+ if (System.getProperty("java.version").split("\\D+")(0).toInt < 9) {
+ // https://bugs.openjdk.java.net/browse/JDK-8079628
+ intercept[SparkUpgradeException] {
+ formatter.format(date(1970, 4, 10))
+ }
+ } else {
+ assert(formatter.format(date(1970, 4, 10)) == "100")
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org