You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/03/20 12:28:43 UTC
[spark] branch master updated: [SPARK-27199][SQL] Replace TimeZone
by ZoneId in TimestampFormatter API
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1882912 [SPARK-27199][SQL] Replace TimeZone by ZoneId in TimestampFormatter API
1882912 is described below
commit 1882912cca4921d3d8c8632b3bb34e69e8119791
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Wed Mar 20 21:28:11 2019 +0900
[SPARK-27199][SQL] Replace TimeZone by ZoneId in TimestampFormatter API
## What changes were proposed in this pull request?
In the PR, I propose to use `ZoneId` instead of `TimeZone` in:
- the `apply` and `getFractionFormatter ` methods of the `TimestampFormatter` object,
- and in implementations of the `TimestampFormatter` trait like `FractionTimestampFormatter`.
The reason of the changes is to avoid unnecessary conversion from `TimeZone` to `ZoneId` because `ZoneId` is used in `TimestampFormatter` implementations internally, and the conversion is performed via `String` which is not for free. Also taking into account that `TimeZone` instances are converted from `String` in some cases, the worse case looks like `String` -> `TimeZone` -> `String` -> `ZoneId`. The PR eliminates the unneeded conversions.
## How was this patch tested?
It was tested by `DateExpressionsSuite`, `DateTimeUtilsSuite` and `TimestampFormatterSuite`.
Closes #24141 from MaxGekk/zone-id.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../spark/sql/catalyst/catalog/interface.scala | 3 +-
.../spark/sql/catalyst/csv/CSVInferSchema.scala | 2 +-
.../apache/spark/sql/catalyst/csv/CSVOptions.scala | 5 +--
.../sql/catalyst/csv/UnivocityGenerator.scala | 2 +-
.../spark/sql/catalyst/csv/UnivocityParser.scala | 2 +-
.../spark/sql/catalyst/expressions/Cast.scala | 2 +-
.../catalyst/expressions/datetimeExpressions.scala | 21 ++++++------
.../spark/sql/catalyst/json/JSONOptions.scala | 5 +--
.../spark/sql/catalyst/json/JacksonGenerator.scala | 2 +-
.../spark/sql/catalyst/json/JacksonParser.scala | 2 +-
.../spark/sql/catalyst/json/JsonInferSchema.scala | 2 +-
.../spark/sql/catalyst/util/DateTimeUtils.scala | 4 +--
.../sql/catalyst/util/TimestampFormatter.scala | 31 +++++++++---------
.../sql/catalyst/csv/UnivocityParserSuite.scala | 8 +++--
.../expressions/DateExpressionsSuite.scala | 3 +-
.../sql/catalyst/util/DateTimeTestUtils.scala | 3 +-
.../sql/catalyst/util/DateTimeUtilsSuite.scala | 2 +-
.../spark/sql/util/TimestampFormatterSuite.scala | 31 ++++++++----------
.../apache/spark/sql/execution/HiveResult.scala | 2 +-
.../execution/datasources/PartitioningUtils.scala | 37 +++++++++++-----------
.../execution/datasources/jdbc/JDBCRelation.scala | 2 +-
.../parquet/ParquetPartitionDiscoverySuite.scala | 21 ++++++------
22 files changed, 99 insertions(+), 93 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 69b5cb4..6006637 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.catalog
import java.net.URI
+import java.time.ZoneOffset
import java.util.Date
import scala.collection.mutable
@@ -477,7 +478,7 @@ object CatalogColumnStat extends Logging {
val VERSION = 2
private def getTimestampFormatter(): TimestampFormatter = {
- TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", timeZone = DateTimeUtils.TimeZoneUTC)
+ TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", zoneId = ZoneOffset.UTC)
}
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 4dd4104..ae9f057 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -29,7 +29,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
private val timestampParser = TimestampFormatter(
options.timestampFormat,
- options.timeZone,
+ options.zoneId,
options.locale)
private val decimalParser = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 90c96d1..1268fcf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -18,7 +18,8 @@
package org.apache.spark.sql.catalyst.csv
import java.nio.charset.StandardCharsets
-import java.util.{Locale, TimeZone}
+import java.time.ZoneId
+import java.util.Locale
import com.univocity.parsers.csv.{CsvParserSettings, CsvWriterSettings, UnescapedQuoteHandling}
@@ -139,7 +140,7 @@ class CSVOptions(
name.map(CompressionCodecs.getCodecClassName)
}
- val timeZone: TimeZone = DateTimeUtils.getTimeZone(
+ val zoneId: ZoneId = DateTimeUtils.getZoneId(
parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
// A language tag in IETF BCP 47 format
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index f012d96..9ca9450 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -43,7 +43,7 @@ class UnivocityGenerator(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
- options.timeZone,
+ options.zoneId,
options.locale)
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 79dff6f..b26044e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -76,7 +76,7 @@ class UnivocityParser(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
- options.timeZone,
+ options.zoneId,
options.locale)
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index c238ccb..a70ed6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -233,7 +233,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
@inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
private lazy val dateFormatter = DateFormatter()
- private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(timeZone)
+ private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
// UDFToString
private[this] def castToString(from: DataType): Any => Any = from match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 7aa1e70..7878a87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.sql.Timestamp
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, ZoneId}
import java.time.temporal.IsoFields
import java.util.{Locale, TimeZone}
@@ -49,6 +49,7 @@ trait TimeZoneAwareExpression extends Expression {
def withTimeZone(timeZoneId: String): TimeZoneAwareExpression
@transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId.get)
+ @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get)
}
/**
@@ -532,16 +533,16 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
copy(timeZoneId = Option(timeZoneId))
override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
- val df = TimestampFormatter(format.toString, timeZone)
+ val df = TimestampFormatter(format.toString, zoneId)
UTF8String.fromString(df.format(timestamp.asInstanceOf[Long]))
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
- val tz = ctx.addReferenceObj("timeZone", timeZone)
+ val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId")
val locale = ctx.addReferenceObj("locale", Locale.US)
defineCodeGen(ctx, ev, (timestamp, format) => {
- s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $tz, $locale)
+ s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
.format($timestamp))"""
})
}
@@ -635,7 +636,7 @@ abstract class UnixTime
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: TimestampFormatter =
try {
- TimestampFormatter(constFormat.toString, timeZone)
+ TimestampFormatter(constFormat.toString, zoneId)
} catch {
case NonFatal(_) => null
}
@@ -668,7 +669,7 @@ abstract class UnixTime
} else {
val formatString = f.asInstanceOf[UTF8String].toString
try {
- TimestampFormatter(formatString, timeZone).parse(
+ TimestampFormatter(formatString, zoneId).parse(
t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND
} catch {
case NonFatal(_) => null
@@ -707,7 +708,7 @@ abstract class UnixTime
}""")
}
case StringType =>
- val tz = ctx.addReferenceObj("timeZone", timeZone)
+ val tz = ctx.addReferenceObj("zoneId", zoneId)
val locale = ctx.addReferenceObj("locale", Locale.US)
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
nullSafeCodeGen(ctx, ev, (string, format) => {
@@ -789,7 +790,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: TimestampFormatter =
try {
- TimestampFormatter(constFormat.toString, timeZone)
+ TimestampFormatter(constFormat.toString, zoneId)
} catch {
case NonFatal(_) => null
}
@@ -815,7 +816,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
null
} else {
try {
- UTF8String.fromString(TimestampFormatter(f.toString, timeZone)
+ UTF8String.fromString(TimestampFormatter(f.toString, zoneId)
.format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
} catch {
case NonFatal(_) => null
@@ -846,7 +847,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
}""")
}
} else {
- val tz = ctx.addReferenceObj("timeZone", timeZone)
+ val tz = ctx.addReferenceObj("zoneId", zoneId)
val locale = ctx.addReferenceObj("locale", Locale.US)
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
nullSafeCodeGen(ctx, ev, (seconds, f) => {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 1ec9d50..788eb00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -18,7 +18,8 @@
package org.apache.spark.sql.catalyst.json
import java.nio.charset.{Charset, StandardCharsets}
-import java.util.{Locale, TimeZone}
+import java.time.ZoneId
+import java.util.Locale
import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
@@ -78,7 +79,7 @@ private[sql] class JSONOptions(
// A language tag in IETF BCP 47 format
val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
- val timeZone: TimeZone = DateTimeUtils.getTimeZone(
+ val zoneId: ZoneId = DateTimeUtils.getZoneId(
parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
val dateFormat: String = parameters.getOrElse("dateFormat", "yyyy-MM-dd")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 951f519..3378040 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -79,7 +79,7 @@ private[sql] class JacksonGenerator(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
- options.timeZone,
+ options.zoneId,
options.locale)
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 8cf758e..19bc5bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -57,7 +57,7 @@ class JacksonParser(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
- options.timeZone,
+ options.zoneId,
options.locale)
private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 1fb4594..c5a97c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -39,7 +39,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
- options.timeZone,
+ options.zoneId,
options.locale)
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 627ee14..45d2406 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -70,9 +70,9 @@ object DateTimeUtils {
def defaultTimeZone(): TimeZone = TimeZone.getDefault()
+ def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS)
def getTimeZone(timeZoneId: String): TimeZone = {
- val zoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS)
- TimeZone.getTimeZone(zoneId)
+ TimeZone.getTimeZone(getZoneId(timeZoneId))
}
// we should use the exact day as Int, for example, (year, month, day) -> day
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index c079691..f2a1a95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -44,17 +44,16 @@ sealed trait TimestampFormatter extends Serializable {
class Iso8601TimestampFormatter(
pattern: String,
- timeZone: TimeZone,
+ zoneId: ZoneId,
locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper {
@transient
protected lazy val formatter = getOrCreateFormatter(pattern, locale)
- private val timeZoneId = timeZone.toZoneId
override def parse(s: String): Long = {
val parsed = formatter.parse(s)
val parsedZoneId = parsed.query(TemporalQueries.zone())
- val zoneId = if (parsedZoneId == null) timeZoneId else parsedZoneId
- val zonedDateTime = toZonedDateTime(parsed, zoneId)
+ val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+ val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
val epochSeconds = zonedDateTime.toEpochSecond
val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
@@ -63,7 +62,7 @@ class Iso8601TimestampFormatter(
override def format(us: Long): String = {
val instant = DateTimeUtils.microsToInstant(us)
- formatter.withZone(timeZoneId).format(instant)
+ formatter.withZone(zoneId).format(instant)
}
}
@@ -73,10 +72,10 @@ class Iso8601TimestampFormatter(
* output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is
* formatted as the string `2019-03-05 15:00:01.1234`.
*
- * @param timeZone the time zone in which the formatter parses or format timestamps
+ * @param zoneId the time zone identifier in which the formatter parses or format timestamps
*/
-class FractionTimestampFormatter(timeZone: TimeZone)
- extends Iso8601TimestampFormatter("", timeZone, TimestampFormatter.defaultLocale) {
+class FractionTimestampFormatter(zoneId: ZoneId)
+ extends Iso8601TimestampFormatter("", zoneId, TimestampFormatter.defaultLocale) {
@transient
override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
@@ -86,19 +85,19 @@ object TimestampFormatter {
val defaultPattern: String = "yyyy-MM-dd HH:mm:ss"
val defaultLocale: Locale = Locale.US
- def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
- new Iso8601TimestampFormatter(format, timeZone, locale)
+ def apply(format: String, zoneId: ZoneId, locale: Locale): TimestampFormatter = {
+ new Iso8601TimestampFormatter(format, zoneId, locale)
}
- def apply(format: String, timeZone: TimeZone): TimestampFormatter = {
- apply(format, timeZone, defaultLocale)
+ def apply(format: String, zoneId: ZoneId): TimestampFormatter = {
+ apply(format, zoneId, defaultLocale)
}
- def apply(timeZone: TimeZone): TimestampFormatter = {
- apply(defaultPattern, timeZone, defaultLocale)
+ def apply(zoneId: ZoneId): TimestampFormatter = {
+ apply(defaultPattern, zoneId, defaultLocale)
}
- def getFractionFormatter(timeZone: TimeZone): TimestampFormatter = {
- new FractionTimestampFormatter(timeZone)
+ def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {
+ new FractionTimestampFormatter(zoneId)
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 4ae61bc..986de12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -116,7 +116,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
val customTimestamp = "31/01/2015 00:00"
var format = FastDateFormat.getInstance(
- timestampsOptions.timestampFormat, timestampsOptions.timeZone, timestampsOptions.locale)
+ timestampsOptions.timestampFormat,
+ TimeZone.getTimeZone(timestampsOptions.zoneId),
+ timestampsOptions.locale)
val expectedTime = format.parse(customTimestamp).getTime
val castedTimestamp = parser.makeConverter("_1", TimestampType, nullable = true)
.apply(customTimestamp)
@@ -126,7 +128,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "GMT")
parser = new UnivocityParser(StructType(Seq.empty), dateOptions)
format = FastDateFormat.getInstance(
- dateOptions.dateFormat, dateOptions.timeZone, dateOptions.locale)
+ dateOptions.dateFormat,
+ TimeZone.getTimeZone(dateOptions.zoneId),
+ dateOptions.locale)
val expectedDate = format.parse(customDate).getTime
val castedDate = parser.makeConverter("_1", DateType, nullable = true)
.apply(customDate)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 62d194f..61ee8f0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
import java.text.SimpleDateFormat
+import java.time.ZoneOffset
import java.util.{Calendar, Locale, TimeZone}
import java.util.concurrent.TimeUnit
import java.util.concurrent.TimeUnit._
@@ -43,7 +44,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val jstId = Option(TimeZoneJST.getID)
def toMillis(timestamp: String): Long = {
- val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", TimeZoneGMT)
+ val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", ZoneOffset.UTC)
TimeUnit.MICROSECONDS.toMillis(tf.parse(timestamp))
}
val date = "2015-04-08 13:10:15"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
index c35ab2b..4dfeb85 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.catalyst.util
-import java.time.{LocalDate, LocalDateTime, LocalTime}
+import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId}
import java.util.TimeZone
import java.util.concurrent.TimeUnit
@@ -40,6 +40,7 @@ object DateTimeTestUtils {
"Asia/Hong_Kong",
"Europe/Amsterdam")
val outstandingTimezones: Seq[TimeZone] = outstandingTimezonesIds.map(TimeZone.getTimeZone)
+ val outstandingZoneIds: Seq[ZoneId] = outstandingTimezonesIds.map(DateTimeUtils.getZoneId)
def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
val originalDefaultTimeZone = TimeZone.getDefault
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 39eb7d1..464d0ab 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -33,7 +33,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
private def defaultTz = DateTimeUtils.defaultTimeZone()
test("nanoseconds truncation") {
- val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone())
+ val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId)
def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
UTF8String.fromString(originalTime), defaultTz)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index 1675b61..d10c30c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -17,13 +17,12 @@
package org.apache.spark.sql.util
-import java.time.{LocalDateTime, ZoneOffset}
-import java.util.TimeZone
+import java.time.{LocalDateTime, ZoneId, ZoneOffset}
import java.util.concurrent.TimeUnit
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, TimestampFormatter}
class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
@@ -38,12 +37,12 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
"Antarctica/Vostok" -> 1543723872001234L,
"Asia/Hong_Kong" -> 1543716672001234L,
"Europe/Amsterdam" -> 1543741872001234L)
- DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+ DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId =>
val formatter = TimestampFormatter(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
- TimeZone.getTimeZone(timeZone))
+ DateTimeUtils.getZoneId(zoneId))
val microsSinceEpoch = formatter.parse(localDate)
- assert(microsSinceEpoch === expectedMicros(timeZone))
+ assert(microsSinceEpoch === expectedMicros(zoneId))
}
}
@@ -58,12 +57,12 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
"Antarctica/Vostok" -> "2018-12-02T16:11:12.001234",
"Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234",
"Europe/Amsterdam" -> "2018-12-02T11:11:12.001234")
- DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+ DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId =>
val formatter = TimestampFormatter(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
- TimeZone.getTimeZone(timeZone))
+ DateTimeUtils.getZoneId(zoneId))
val timestamp = formatter.format(microsSinceEpoch)
- assert(timestamp === expectedTimestamp(timeZone))
+ assert(timestamp === expectedTimestamp(zoneId))
}
}
@@ -79,8 +78,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
1543749753123456L,
2177456523456789L,
11858049903010203L).foreach { micros =>
- DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
- val formatter = TimestampFormatter(pattern, timeZone)
+ DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+ val formatter = TimestampFormatter(pattern, zoneId)
val timestamp = formatter.format(micros)
val parsed = formatter.parse(timestamp)
assert(micros === parsed)
@@ -100,8 +99,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
"2018-12-02T11:22:33.123456",
"2039-01-01T01:02:03.456789",
"2345-10-07T22:45:03.010203").foreach { timestamp =>
- DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
- val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone)
+ DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+ val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", zoneId)
val micros = formatter.parse(timestamp)
val formatted = formatter.format(micros)
assert(timestamp === formatted)
@@ -110,16 +109,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
}
test(" case insensitive parsing of am and pm") {
- val formatter = TimestampFormatter(
- "yyyy MMM dd hh:mm:ss a",
- TimeZone.getTimeZone("UTC"))
+ val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", ZoneOffset.UTC)
val micros = formatter.parse("2009 Mar 20 11:30:01 am")
assert(micros === TimeUnit.SECONDS.toMicros(
LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
}
test("format fraction of second") {
- val formatter = TimestampFormatter.getFractionFormatter(TimeZone.getTimeZone("UTC"))
+ val formatter = TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
assert(formatter.format(0) === "1970-01-01 00:00:00")
assert(formatter.format(1) === "1970-01-01 00:00:00.000001")
assert(formatter.format(1000) === "1970-01-01 00:00:00.001")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 38ef72e..eec8d70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -79,7 +79,7 @@ object HiveResult {
private lazy val dateFormatter = DateFormatter()
private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(
- DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone))
+ DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
/** Hive outputs fields of structs slightly differently than top level attributes. */
private def toHiveStructString(a: (Any, DataType)): String = a match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 0625cfb..6f42423 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.execution.datasources
import java.lang.{Double => JDouble, Long => JLong}
import java.math.{BigDecimal => JBigDecimal}
-import java.util.{Locale, TimeZone}
+import java.time.ZoneId
+import java.util.Locale
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
@@ -100,7 +101,7 @@ object PartitioningUtils {
validatePartitionColumns: Boolean,
timeZoneId: String): PartitionSpec = {
parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema, caseSensitive,
- validatePartitionColumns, DateTimeUtils.getTimeZone(timeZoneId))
+ validatePartitionColumns, DateTimeUtils.getZoneId(timeZoneId))
}
private[datasources] def parsePartitions(
@@ -110,7 +111,7 @@ object PartitioningUtils {
userSpecifiedSchema: Option[StructType],
caseSensitive: Boolean,
validatePartitionColumns: Boolean,
- timeZone: TimeZone): PartitionSpec = {
+ zoneId: ZoneId): PartitionSpec = {
val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) {
val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap
if (!caseSensitive) {
@@ -130,11 +131,11 @@ object PartitioningUtils {
}
val dateFormatter = DateFormatter()
- val timestampFormatter = TimestampFormatter(timestampPartitionPattern, timeZone)
+ val timestampFormatter = TimestampFormatter(timestampPartitionPattern, zoneId)
// First, we need to parse every partition's path and see if we can find partition values.
val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes,
- validatePartitionColumns, timeZone, dateFormatter, timestampFormatter)
+ validatePartitionColumns, zoneId, dateFormatter, timestampFormatter)
}.unzip
// We create pairs of (path -> path's partition value) here
@@ -169,7 +170,7 @@ object PartitioningUtils {
"please load them separately and then union them.")
val resolvedPartitionValues =
- resolvePartitions(pathsWithPartitionValues, caseSensitive, timeZone)
+ resolvePartitions(pathsWithPartitionValues, caseSensitive, zoneId)
// Creates the StructType which represents the partition columns.
val fields = {
@@ -219,7 +220,7 @@ object PartitioningUtils {
basePaths: Set[Path],
userSpecifiedDataTypes: Map[String, DataType],
validatePartitionColumns: Boolean,
- timeZone: TimeZone,
+ zoneId: ZoneId,
dateFormatter: DateFormatter,
timestampFormatter: TimestampFormatter): (Option[PartitionValues], Option[Path]) = {
val columns = ArrayBuffer.empty[(String, Literal)]
@@ -243,7 +244,7 @@ object PartitioningUtils {
// Once we get the string, we try to parse it and find the partition column and value.
val maybeColumn =
parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes,
- validatePartitionColumns, timeZone, dateFormatter, timestampFormatter)
+ validatePartitionColumns, zoneId, dateFormatter, timestampFormatter)
maybeColumn.foreach(columns += _)
// Now, we determine if we should stop.
@@ -278,7 +279,7 @@ object PartitioningUtils {
typeInference: Boolean,
userSpecifiedDataTypes: Map[String, DataType],
validatePartitionColumns: Boolean,
- timeZone: TimeZone,
+ zoneId: ZoneId,
dateFormatter: DateFormatter,
timestampFormatter: TimestampFormatter): Option[(String, Literal)] = {
val equalSignIndex = columnSpec.indexOf('=')
@@ -298,11 +299,11 @@ object PartitioningUtils {
val columnValueLiteral = inferPartitionColumnValue(
rawColumnValue,
false,
- timeZone,
+ zoneId,
dateFormatter,
timestampFormatter)
val columnValue = columnValueLiteral.eval()
- val castedValue = Cast(columnValueLiteral, dataType, Option(timeZone.getID)).eval()
+ val castedValue = Cast(columnValueLiteral, dataType, Option(zoneId.getId)).eval()
if (validatePartitionColumns && columnValue != null && castedValue == null) {
throw new RuntimeException(s"Failed to cast value `$columnValue` to `$dataType` " +
s"for partition column `$columnName`")
@@ -312,7 +313,7 @@ object PartitioningUtils {
inferPartitionColumnValue(
rawColumnValue,
typeInference,
- timeZone,
+ zoneId,
dateFormatter,
timestampFormatter)
}
@@ -384,7 +385,7 @@ object PartitioningUtils {
def resolvePartitions(
pathsWithPartitionValues: Seq[(Path, PartitionValues)],
caseSensitive: Boolean,
- timeZone: TimeZone): Seq[PartitionValues] = {
+ zoneId: ZoneId): Seq[PartitionValues] = {
if (pathsWithPartitionValues.isEmpty) {
Seq.empty
} else {
@@ -401,7 +402,7 @@ object PartitioningUtils {
val values = pathsWithPartitionValues.map(_._2)
val columnCount = values.head.columnNames.size
val resolvedValues = (0 until columnCount).map { i =>
- resolveTypeConflicts(values.map(_.literals(i)), timeZone)
+ resolveTypeConflicts(values.map(_.literals(i)), zoneId)
}
// Fills resolved literals back to each partition
@@ -467,7 +468,7 @@ object PartitioningUtils {
private[datasources] def inferPartitionColumnValue(
raw: String,
typeInference: Boolean,
- timeZone: TimeZone,
+ zoneId: ZoneId,
dateFormatter: DateFormatter,
timestampFormatter: TimestampFormatter): Literal = {
val decimalTry = Try {
@@ -503,7 +504,7 @@ object PartitioningUtils {
// TimestampType
timestampFormatter.parse(unescapedRaw)
// SPARK-23436: see comment for date
- val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval()
+ val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(zoneId.getId)).eval()
// Disallow TimestampType if the cast returned null
require(timestampValue != null)
Literal.create(timestampValue, TimestampType)
@@ -607,12 +608,12 @@ object PartitioningUtils {
* Given a collection of [[Literal]]s, resolves possible type conflicts by
* [[findWiderTypeForPartitionColumn]].
*/
- private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = {
+ private def resolveTypeConflicts(literals: Seq[Literal], zoneId: ZoneId): Seq[Literal] = {
val litTypes = literals.map(_.dataType)
val desiredType = litTypes.reduce(findWiderTypeForPartitionColumn)
literals.map { case l @ Literal(_, dataType) =>
- Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType)
+ Literal.create(Cast(l, desiredType, Some(zoneId.getId)).eval(), desiredType)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 724a0f3..fe45d67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -200,7 +200,7 @@ private[sql] object JDBCRelation extends Logging {
case DateType => DateFormatter().format(value.toInt)
case TimestampType =>
val timestampFormatter = TimestampFormatter.getFractionFormatter(
- DateTimeUtils.getTimeZone(timeZoneId))
+ DateTimeUtils.getZoneId(timeZoneId))
DateTimeUtils.timestampToString(timestampFormatter, value)
}
s"'$dateTimeStr'"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 864c1e9..febbe05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
import java.io.File
import java.math.BigInteger
import java.sql.{Date, Timestamp}
+import java.time.{ZoneId, ZoneOffset}
import java.util.{Calendar, Locale, TimeZone}
import scala.collection.mutable.ArrayBuffer
@@ -36,7 +37,6 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, Timesta
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition}
import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
@@ -54,10 +54,9 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME
- val timeZone = TimeZone.getDefault()
- val timeZoneId = timeZone.getID
+ val timeZoneId = ZoneId.systemDefault()
val df = DateFormatter()
- val tf = TimestampFormatter(timestampPartitionPattern, timeZone)
+ val tf = TimestampFormatter(timestampPartitionPattern, timeZoneId)
protected override def beforeAll(): Unit = {
super.beforeAll()
@@ -70,8 +69,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
}
test("column type inference") {
- def check(raw: String, literal: Literal, timeZone: TimeZone = timeZone): Unit = {
- assert(inferPartitionColumnValue(raw, true, timeZone, df, tf) === literal)
+ def check(raw: String, literal: Literal, zoneId: ZoneId = timeZoneId): Unit = {
+ assert(inferPartitionColumnValue(raw, true, zoneId, df, tf) === literal)
}
check("10", Literal.create(10, IntegerType))
@@ -90,7 +89,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
c.set(Calendar.MILLISECOND, 0)
check("1990-02-24 12:00:30",
Literal.create(new Timestamp(c.getTimeInMillis), TimestampType),
- TimeZone.getTimeZone("GMT"))
+ ZoneOffset.UTC)
check(defaultPartitionName, Literal.create(null, NullType))
}
@@ -199,13 +198,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
test("parse partition") {
def check(path: String, expected: Option[PartitionValues]): Unit = {
val actual = parsePartition(new Path(path), true, Set.empty[Path],
- Map.empty, true, timeZone, df, tf)._1
+ Map.empty, true, timeZoneId, df, tf)._1
assert(expected === actual)
}
def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
val message = intercept[T] {
- parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZone, df, tf)
+ parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZoneId, df, tf)
}.getMessage
assert(message.contains(expected))
@@ -251,7 +250,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
basePaths = Set(new Path("file://path/a=10")),
Map.empty,
true,
- timeZone = timeZone,
+ zoneId = timeZoneId,
df,
tf)._1
@@ -264,7 +263,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
basePaths = Set(new Path("file://path")),
Map.empty,
true,
- timeZone = timeZone,
+ zoneId = timeZoneId,
df,
tf)._1
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org