You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/06/09 15:58:24 UTC

[spark] branch master updated: [SPARK-29295][SQL][FOLLOWUP] Dynamic partition map parsed from partition path should be case insensitive

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 717ec5e  [SPARK-29295][SQL][FOLLOWUP] Dynamic partition map parsed from partition path should be case insensitive
717ec5e is described below

commit 717ec5e9e3275e5bfdd7743fc62f90eff4843655
Author: turbofei <fw...@ebay.com>
AuthorDate: Tue Jun 9 15:57:18 2020 +0000

    [SPARK-29295][SQL][FOLLOWUP] Dynamic partition map parsed from partition path should be case insensitive
    
    ### What changes were proposed in this pull request?
    
    This is a follow up of https://github.com/apache/spark/pull/25979.
    When we inserting overwrite  an external hive partitioned table with upper case dynamic partition key, exception thrown.
    
    like:
    ```
    org.apache.spark.SparkException: Dynamic partition key P1 is not among written partition paths.
    ```
    The root cause is that Hive metastore is not case preserving and keeps partition columns with lower cased names, see details in:
    
    https://github.com/apache/spark/blob/ddd8d5f5a0b6db17babc201ba4b73f7df91df1a3/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala#L895-L901
    https://github.com/apache/spark/blob/e28914095aa1fa7a4680b5e4fcf69e3ef64b3dbc/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala#L228-L234
    
    In this PR, we convert the dynamic partition map to a case insensitive map.
    ### Why are the changes needed?
    
    To fix the issue when inserting overwrite into external hive partitioned table with upper case dynamic partition key.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    UT.
    
    Closes #28765 from turboFei/SPARK-29295-follow-up.
    
    Authored-by: turbofei <fw...@ebay.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/hive/execution/InsertIntoHiveTable.scala      |  6 +++++-
 .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 13 +++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 9f83f2a..116217e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog
@@ -225,9 +226,12 @@ case class InsertIntoHiveTable(
                 ExternalCatalogUtils.unescapePathName(splitPart(1))
             }.toMap
 
+            val caseInsensitiveDpMap = CaseInsensitiveMap(dpMap)
+
             val updatedPartitionSpec = partition.map {
               case (key, Some(value)) => key -> value
-              case (key, None) if dpMap.contains(key) => key -> dpMap(key)
+              case (key, None) if caseInsensitiveDpMap.contains(key) =>
+                key -> caseInsensitiveDpMap(key)
               case (key, _) =>
                 throw new SparkException(s"Dynamic partition key $key is not among " +
                   "written partition paths.")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 79c6ade..d12eae0e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2544,6 +2544,19 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       assert(e.getMessage.contains("Cannot modify the value of a static config"))
     }
   }
+
+  test("SPARK-29295: dynamic partition map parsed from partition path should be case insensitive") {
+    withTable("t") {
+      withSQLConf("hive.exec.dynamic.partition" -> "true",
+        "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        withTempDir { loc =>
+          sql(s"CREATE TABLE t(c1 INT) PARTITIONED BY(P1 STRING) LOCATION '${loc.getAbsolutePath}'")
+          sql("INSERT OVERWRITE TABLE t PARTITION(P1) VALUES(1, 'caseSensitive')")
+          checkAnswer(sql("select * from t"), Row(1, "caseSensitive"))
+        }
+      }
+    }
+  }
 }
 
 class SQLQuerySuite extends SQLQuerySuiteBase with DisableAdaptiveExecutionSuite


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org