You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/02/03 00:29:56 UTC
[spark] branch branch-3.1 updated:
[SPARK-33591][3.1][SQL][FOLLOWUP] Add legacy config for recognizing null
partition spec values
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 18def59 [SPARK-33591][3.1][SQL][FOLLOWUP] Add legacy config for recognizing null partition spec values
18def59 is described below
commit 18def5955dbde1fdddfed78a691d9adc97cfe7d7
Author: Gengliang Wang <ge...@databricks.com>
AuthorDate: Wed Feb 3 09:29:35 2021 +0900
[SPARK-33591][3.1][SQL][FOLLOWUP] Add legacy config for recognizing null partition spec values
### What changes were proposed in this pull request?
This PR is to backport https://github.com/apache/spark/pull/31421 and https://github.com/apache/spark/pull/31434 to branch 3.1
This is a follow up for https://github.com/apache/spark/pull/30538.
It adds a legacy conf `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` in case users wants the legacy behavior.
It also adds document for the behavior change.
### Why are the changes needed?
In case users want the legacy behavior, they can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true.
### Does this PR introduce _any_ user-facing change?
Yes, adding a legacy configuration to restore the old behavior.
### How was this patch tested?
Unit test.
Closes #31439 from gengliangwang/backportLegacyConf3.1.
Authored-by: Gengliang Wang <ge...@databricks.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
docs/sql-migration-guide.md | 2 ++
.../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 10 +++++++---
.../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++
.../scala/org/apache/spark/sql/execution/SparkSqlParser.scala | 2 +-
.../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 +++++++++++
5 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 2beddcb..36dccf9 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -70,6 +70,8 @@ license: |
* `ALTER TABLE .. ADD PARTITION` throws `PartitionsAlreadyExistException` if new partition exists already
* `ALTER TABLE .. DROP PARTITION` throws `NoSuchPartitionsException` for not existing partitions
+ - In Spark 3.0.2, `PARTITION(col=null)` is always parsed as a null literal in the partition spec. In Spark 3.0.1 or earlier, it is parsed as a string literal of its text representation, e.g., string "null", if the partition column is string type. To restore the legacy behavior, you can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true.
+
## Upgrading from Spark SQL 3.0 to 3.0.1
- In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 34f56e9..c7ca4b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -481,9 +481,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
*/
override def visitPartitionSpec(
ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
+ val legacyNullAsString =
+ conf.getConf(SQLConf.LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL)
val parts = ctx.partitionVal.asScala.map { pVal =>
val name = pVal.identifier.getText
- val value = Option(pVal.constant).map(visitStringConstant)
+ val value = Option(pVal.constant).map(v => visitStringConstant(v, legacyNullAsString))
name -> value
}
// Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values
@@ -509,9 +511,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
* main purpose is to prevent slight differences due to back to back conversions i.e.:
* String -> Literal -> String.
*/
- protected def visitStringConstant(ctx: ConstantContext): String = withOrigin(ctx) {
+ protected def visitStringConstant(
+ ctx: ConstantContext,
+ legacyNullAsString: Boolean): String = withOrigin(ctx) {
ctx match {
- case _: NullLiteralContext => null
+ case _: NullLiteralContext if !legacyNullAsString => null
case s: StringLiteralContext => createString(s)
case o => o.getText
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d5762a6..687f645 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2435,6 +2435,16 @@ object SQLConf {
.booleanConf
.createWithDefault(true)
+ val LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL =
+ buildConf("spark.sql.legacy.parseNullPartitionSpecAsStringLiteral")
+ .internal()
+ .doc("If it is set to true, `PARTITION(col=null)` is parsed as a string literal of its " +
+ "text representation, e.g., string 'null', when the partition column is string type. " +
+ "Otherwise, it is always parsed as a null literal in the partition spec.")
+ .version("3.0.2")
+ .booleanConf
+ .createWithDefault(false)
+
val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")
.internal()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8ee3521..72dd007 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -353,7 +353,7 @@ class SparkSqlAstBuilder extends AstBuilder {
* Convert a constants list into a String sequence.
*/
override def visitConstantList(ctx: ConstantListContext): Seq[String] = withOrigin(ctx) {
- ctx.constant.asScala.map(visitStringConstant).toSeq
+ ctx.constant.asScala.map(v => visitStringConstant(v, legacyNullAsString = false)).toSeq
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5ce236c..0808b80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3734,6 +3734,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}
+ test("SPARK-33591: null as string partition literal value 'null' after setting legacy conf") {
+ withSQLConf(SQLConf.LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL.key -> "true") {
+ val t = "tbl"
+ withTable("tbl") {
+ sql(s"CREATE TABLE $t (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1)")
+ sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
+ checkAnswer(spark.sql(s"SELECT * FROM $t"), Row(0, "null"))
+ }
+ }
+ }
+
test("SPARK-33593: Vector reader got incorrect data with binary partition value") {
Seq("false", "true").foreach(value => {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org