You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/07/07 21:11:16 UTC
[spark] branch master updated: [SPARK-31710][SQL][FOLLOWUP] Allow
cast numeric to timestamp by default
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1261fac [SPARK-31710][SQL][FOLLOWUP] Allow cast numeric to timestamp by default
1261fac is described below
commit 1261fac67457afab38bfad42e62d7bf93c661204
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Tue Jul 7 14:09:40 2020 -0700
[SPARK-31710][SQL][FOLLOWUP] Allow cast numeric to timestamp by default
### What changes were proposed in this pull request?
1. Set the SQL config `spark.sql.legacy.allowCastNumericToTimestamp` to `true` by default
2. Remove explicit sets of `spark.sql.legacy.allowCastNumericToTimestamp` to `true` in the cast suites.
### Why are the changes needed?
To avoid breaking changes in minor versions (in the upcoming Spark 3.1.0) according to the the semantic versioning guidelines (https://spark.apache.org/versioning-policy.html)
### Does this PR introduce _any_ user-facing change?
Yes
### How was this patch tested?
By `CastSuite`.
Closes #29012 from MaxGekk/allow-cast-numeric-to-timestamp.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
docs/sql-migration-guide.md | 2 -
.../org/apache/spark/sql/internal/SQLConf.scala | 2 +-
.../spark/sql/catalyst/expressions/CastSuite.scala | 84 ++++++++++------------
3 files changed, 38 insertions(+), 50 deletions(-)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 0c84db3..d3138ae 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -30,8 +30,6 @@ license: |
- In Spark 3.1, `from_unixtime`, `unix_timestamp`,`to_unix_timestamp`, `to_timestamp` and `to_date` will fail if the specified datetime pattern is invalid. In Spark 3.0 or earlier, they result `NULL`.
- - In Spark 3.1, casting numeric to timestamp will be forbidden by default. It's strongly recommended to use dedicated functions: TIMESTAMP_SECONDS, TIMESTAMP_MILLIS and TIMESTAMP_MICROS. Or you can set `spark.sql.legacy.allowCastNumericToTimestamp` to true to work around it. See more details in SPARK-31710.
-
## Upgrading from Spark SQL 3.0 to 3.0.1
- In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3149d14..31dd943 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2634,7 +2634,7 @@ object SQLConf {
"when false, forbid the cast, more details in SPARK-31710")
.version("3.1.0")
.booleanConf
- .createWithDefault(false)
+ .createWithDefault(true)
val COALESCE_BUCKETS_IN_SORT_MERGE_JOIN_ENABLED =
buildConf("spark.sql.bucketing.coalesceBucketsInSortMergeJoin.enabled")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 76ec450..4ab288a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -49,9 +49,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
}
protected def checkNullCast(from: DataType, to: DataType): Unit = {
- withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> "true") {
- checkEvaluation(cast(Literal.create(null, from), to, UTC_OPT), null)
- }
+ checkEvaluation(cast(Literal.create(null, from), to, UTC_OPT), null)
}
test("null cast") {
@@ -240,9 +238,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
checkCast(1.5, 1.5f)
checkCast(1.5, "1.5")
- withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> "true") {
- checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
- }
+ checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
}
test("cast from string") {
@@ -309,19 +305,17 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
cast(cast("5", ByteType), ShortType), IntegerType), FloatType), DoubleType), LongType),
5.toLong)
- withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> "true") {
- checkEvaluation(
- cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
- DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
- 5.toShort)
- checkEvaluation(
- cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
- DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
- null)
- checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
- ByteType), TimestampType), LongType), StringType), ShortType),
- 5.toShort)
- }
+ checkEvaluation(
+ cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
+ DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+ 5.toShort)
+ checkEvaluation(
+ cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
+ DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+ null)
+ checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
+ ByteType), TimestampType), LongType), StringType), ShortType),
+ 5.toShort)
checkEvaluation(cast("23", DoubleType), 23d)
checkEvaluation(cast("23", IntegerType), 23)
@@ -383,31 +377,29 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(cast(ts, FloatType), 15.003f)
checkEvaluation(cast(ts, DoubleType), 15.003)
- withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> "true") {
- checkEvaluation(cast(cast(tss, ShortType), TimestampType),
- fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
- checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
- fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
- checkEvaluation(cast(cast(tss, LongType), TimestampType),
- fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
- checkEvaluation(
- cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
- millis.toFloat / MILLIS_PER_SECOND)
- checkEvaluation(
- cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
- millis.toDouble / MILLIS_PER_SECOND)
- checkEvaluation(
- cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
- Decimal(1))
+ checkEvaluation(cast(cast(tss, ShortType), TimestampType),
+ fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+ checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
+ fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+ checkEvaluation(cast(cast(tss, LongType), TimestampType),
+ fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+ checkEvaluation(
+ cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
+ millis.toFloat / MILLIS_PER_SECOND)
+ checkEvaluation(
+ cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
+ millis.toDouble / MILLIS_PER_SECOND)
+ checkEvaluation(
+ cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
+ Decimal(1))
- // A test for higher precision than millis
- checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
+ // A test for higher precision than millis
+ checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
- checkEvaluation(cast(Double.NaN, TimestampType), null)
- checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
- checkEvaluation(cast(Float.NaN, TimestampType), null)
- checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
- }
+ checkEvaluation(cast(Double.NaN, TimestampType), null)
+ checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
+ checkEvaluation(cast(Float.NaN, TimestampType), null)
+ checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
}
test("cast from array") {
@@ -1036,10 +1028,8 @@ class CastSuite extends CastSuiteBase {
test("cast from int 2") {
checkEvaluation(cast(1, LongType), 1.toLong)
- withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> "true") {
- checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
- checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
- }
+ checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
+ checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
@@ -1323,7 +1313,7 @@ class CastSuite extends CastSuiteBase {
}
}
- test("SPARK-31710:fail casting from numeric to timestamp by default") {
+ test("SPARK-31710: fail casting from numeric to timestamp if it is forbidden") {
Seq(true, false).foreach { enable =>
withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> enable.toString) {
assert(cast(2.toByte, TimestampType).resolved == enable)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org