You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/09 15:58:24 UTC
[spark] branch master updated: [SPARK-29295][SQL][FOLLOWUP] Dynamic
partition map parsed from partition path should be case insensitive
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 717ec5e [SPARK-29295][SQL][FOLLOWUP] Dynamic partition map parsed from partition path should be case insensitive
717ec5e is described below
commit 717ec5e9e3275e5bfdd7743fc62f90eff4843655
Author: turbofei <fw...@ebay.com>
AuthorDate: Tue Jun 9 15:57:18 2020 +0000
[SPARK-29295][SQL][FOLLOWUP] Dynamic partition map parsed from partition path should be case insensitive
### What changes were proposed in this pull request?
This is a follow up of https://github.com/apache/spark/pull/25979.
When we inserting overwrite an external hive partitioned table with upper case dynamic partition key, exception thrown.
like:
```
org.apache.spark.SparkException: Dynamic partition key P1 is not among written partition paths.
```
The root cause is that Hive metastore is not case preserving and keeps partition columns with lower cased names, see details in:
https://github.com/apache/spark/blob/ddd8d5f5a0b6db17babc201ba4b73f7df91df1a3/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala#L895-L901
https://github.com/apache/spark/blob/e28914095aa1fa7a4680b5e4fcf69e3ef64b3dbc/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala#L228-L234
In this PR, we convert the dynamic partition map to a case insensitive map.
### Why are the changes needed?
To fix the issue when inserting overwrite into external hive partitioned table with upper case dynamic partition key.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
UT.
Closes #28765 from turboFei/SPARK-29295-follow-up.
Authored-by: turbofei <fw...@ebay.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/hive/execution/InsertIntoHiveTable.scala | 6 +++++-
.../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 13 +++++++++++++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 9f83f2a..116217e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.command.CommandUtils
import org.apache.spark.sql.hive.HiveExternalCatalog
@@ -225,9 +226,12 @@ case class InsertIntoHiveTable(
ExternalCatalogUtils.unescapePathName(splitPart(1))
}.toMap
+ val caseInsensitiveDpMap = CaseInsensitiveMap(dpMap)
+
val updatedPartitionSpec = partition.map {
case (key, Some(value)) => key -> value
- case (key, None) if dpMap.contains(key) => key -> dpMap(key)
+ case (key, None) if caseInsensitiveDpMap.contains(key) =>
+ key -> caseInsensitiveDpMap(key)
case (key, _) =>
throw new SparkException(s"Dynamic partition key $key is not among " +
"written partition paths.")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 79c6ade..d12eae0e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2544,6 +2544,19 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
assert(e.getMessage.contains("Cannot modify the value of a static config"))
}
}
+
+ test("SPARK-29295: dynamic partition map parsed from partition path should be case insensitive") {
+ withTable("t") {
+ withSQLConf("hive.exec.dynamic.partition" -> "true",
+ "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+ withTempDir { loc =>
+ sql(s"CREATE TABLE t(c1 INT) PARTITIONED BY(P1 STRING) LOCATION '${loc.getAbsolutePath}'")
+ sql("INSERT OVERWRITE TABLE t PARTITION(P1) VALUES(1, 'caseSensitive')")
+ checkAnswer(sql("select * from t"), Row(1, "caseSensitive"))
+ }
+ }
+ }
+ }
}
class SQLQuerySuite extends SQLQuerySuiteBase with DisableAdaptiveExecutionSuite
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org