You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2019/03/09 13:28:33 UTC
[spark] branch master updated: [SPARK-27080][SQL] bug fix: mergeWithMetastoreSchema with uniform lower case comparison

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a29df5f  [SPARK-27080][SQL] bug fix: mergeWithMetastoreSchema with uniform lower case comparison
a29df5f is described below

commit a29df5fa02111f57965be2ab5e208f5c815265fe
Author: CodeGod <>
AuthorDate: Sat Mar 9 21:28:10 2019 +0800

    [SPARK-27080][SQL] bug fix: mergeWithMetastoreSchema with uniform lower case comparison
    
    ## What changes were proposed in this pull request?
    When reading parquet file with merging metastore schema and file schema, we should compare field names using uniform case. In current implementation, lowercase is used but one omission. And this patch fix it.
    
    ## How was this patch tested?
    Unit test
    
    Closes #24001 from codeborui/mergeSchemaBugFix.
    
    Authored-by: CodeGod <>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../spark/sql/hive/HiveMetastoreCatalog.scala      |  2 +-
 .../spark/sql/hive/HiveSchemaInferenceSuite.scala  | 26 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 03f4b8d..d6b2945 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -326,8 +326,8 @@ private[hive] object HiveMetastoreCatalog {
     // Merge missing nullable fields to inferred schema and build a case-insensitive field map.
     val inferredFields = StructType(inferredSchema ++ missingNullables)
       .map(f => f.name.toLowerCase -> f).toMap
+    StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name.toLowerCase).name)))
     // scalastyle:on caselocale
-    StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name)))
   } catch {
     case NonFatal(_) =>
       val msg = s"""Detected conflicting schemas when merging the schema obtained from the Hive
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
index aa4fc13..590ef94 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
@@ -264,6 +264,32 @@ class HiveSchemaInferenceSuite
         StructType(Seq(StructField("lowerCase", BinaryType))))
     }
 
+    // Parquet schema is subset of metaStore schema and has uppercase field name
+    assertResult(
+      StructType(Seq(
+        StructField("UPPERCase", DoubleType, nullable = true),
+        StructField("lowerCase", BinaryType, nullable = true)))) {
+
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("UPPERCase", DoubleType, nullable = true),
+          StructField("lowerCase", BinaryType, nullable = true))),
+
+        StructType(Seq(
+          StructField("lowerCase", BinaryType, nullable = true))))
+    }
+
+    // Metastore schema contains additional nullable fields.
+    assert(intercept[Throwable] {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("UPPERCase", DoubleType, nullable = false),
+          StructField("lowerCase", BinaryType, nullable = true))),
+
+        StructType(Seq(
+          StructField("lowerCase", BinaryType, nullable = true))))
+    }.getMessage.contains("Detected conflicting schemas"))
+
     // Check that merging missing nullable fields works as expected.
     assertResult(
       StructType(Seq(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org