You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2019/02/03 00:20:41 UTC
[spark] branch master updated: [SPARK-26805][SQL] Eliminate double
checking of stringToDate and stringToTimestamp inputs
This is an automated email from the ASF dual-hosted git repository.
srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 96c6c29 [SPARK-26805][SQL] Eliminate double checking of stringToDate and stringToTimestamp inputs
96c6c29 is described below
commit 96c6c295cc909c8904b0eb6c333a62f4897461c6
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Sat Feb 2 18:20:16 2019 -0600
[SPARK-26805][SQL] Eliminate double checking of stringToDate and stringToTimestamp inputs
## What changes were proposed in this pull request?
In the PR, I propose to eliminate checking of parsed segments inside of the `stringToDate` and `stringToTimestamp` because such checking is already performed while constructing *java.time* classes, in particular inside of `LocalDate` and `LocalTime`. As a consequence of removing the explicit checks, the `isInvalidDate` method is not needed any more, and it was removed from `DateTimeUtils`.
## How was this patch tested?
This was tested by `DateExpressionsSuite`, `DateFunctionsSuite`, `DateTimeUtilsSuite` and `CastSuite`.
Closes #23717 from MaxGekk/datetimeutils-refactoring.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Sean Owen <se...@databricks.com>
---
.../spark/sql/catalyst/util/DateTimeUtils.scala | 83 ++++++++--------------
1 file changed, 29 insertions(+), 54 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index a97c1f7..b537695 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -18,13 +18,15 @@
package org.apache.spark.sql.catalyst.util
import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, Month, ZonedDateTime}
+import java.time._
import java.time.Year.isLeap
import java.time.temporal.IsoFields
import java.util.{Locale, TimeZone}
import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
import java.util.function.{Function => JFunction}
+import scala.util.control.NonFatal
+
import org.apache.spark.unsafe.types.UTF8String
/**
@@ -60,7 +62,6 @@ object DateTimeUtils {
final val toYearZero = to2001 + 7304850
final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
- final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
val TIMEZONE_OPTION = "timeZone"
@@ -326,34 +327,27 @@ object DateTimeUtils {
segments(6) /= 10
digitsMilli -= 1
}
-
- if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
- return None
- }
-
- if (segments(3) < 0 || segments(3) > 23 || segments(4) < 0 || segments(4) > 59 ||
- segments(5) < 0 || segments(5) > 59 || segments(6) < 0 || segments(6) > 999999 ||
- segments(7) < 0 || segments(7) > 23 || segments(8) < 0 || segments(8) > 59) {
- return None
- }
-
- val zoneId = if (tz.isEmpty) {
- timeZone.toZoneId
- } else {
- getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d").toZoneId
- }
- val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6))
- val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
- val localDate = if (justTime) {
- LocalDate.now(zoneId)
- } else {
- LocalDate.of(segments(0), segments(1), segments(2))
+ try {
+ val zoneId = if (tz.isEmpty) {
+ timeZone.toZoneId
+ } else {
+ val sign = if (tz.get.toChar == '-') -1 else 1
+ ZoneId.ofOffset("GMT", ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)))
+ }
+ val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6))
+ val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
+ val localDate = if (justTime) {
+ LocalDate.now(zoneId)
+ } else {
+ LocalDate.of(segments(0), segments(1), segments(2))
+ }
+ val localDateTime = LocalDateTime.of(localDate, localTime)
+ val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
+ val instant = Instant.from(zonedDateTime)
+ Some(instantToMicros(instant))
+ } catch {
+ case NonFatal(_) => None
}
- val localDateTime = LocalDateTime.of(localDate, localTime)
- val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
- val instant = Instant.from(zonedDateTime)
-
- Some(instantToMicros(instant))
}
def instantToMicros(instant: Instant): Long = {
@@ -414,32 +408,13 @@ object DateTimeUtils {
return None
}
segments(i) = currentSegmentValue
- if (isInvalidDate(segments(0), segments(1), segments(2))) {
- return None
- }
-
- val localDate = LocalDate.of(segments(0), segments(1), segments(2))
- val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
- Some(instantToDays(instant))
- }
-
- /**
- * Return true if the date is invalid.
- */
- private def isInvalidDate(year: Int, month: Int, day: Int): Boolean = {
- if (year < 0 || year > 9999 || month < 1 || month > 12 || day < 1 || day > 31) {
- return true
- }
- if (month == 2) {
- if (isLeap(year) && day > 29) {
- return true
- } else if (!isLeap(year) && day > 28) {
- return true
- }
- } else if (!MonthOf31Days.contains(month) && day > 30) {
- return true
+ try {
+ val localDate = LocalDate.of(segments(0), segments(1), segments(2))
+ val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant
+ Some(instantToDays(instant))
+ } catch {
+ case NonFatal(_) => None
}
- false
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org