You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Wenchen Fan (JIRA)" <ji...@apache.org> on 2019/03/09 13:35:00 UTC
[jira] [Resolved] (SPARK-27080) Read parquet file with merging
metastore schema should compare schema field in uniform case.
[ https://issues.apache.org/jira/browse/SPARK-27080?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Wenchen Fan resolved SPARK-27080.
---------------------------------
Resolution: Fixed
Fix Version/s: 2.3.4
2.4.1
3.0.0
Issue resolved by pull request 24001
[https://github.com/apache/spark/pull/24001]
> Read parquet file with merging metastore schema should compare schema field in uniform case.
> --------------------------------------------------------------------------------------------
>
> Key: SPARK-27080
> URL: https://issues.apache.org/jira/browse/SPARK-27080
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.3.2, 2.3.3, 2.4.0
> Reporter: BoMeng
> Priority: Major
> Fix For: 3.0.0, 2.4.1, 2.3.4
>
>
> In our product environment, when we upgrade spark from version 2.1 to 2.3, the job failed with an exception as below:
> ---ERROR stack trace –
> Exception occur when running Job,
> org.apache.spark.SparkException: Detected conflicting schemas when merging the schema obtained from the Hive
> Metastore with the one inferred from the file format. Metastore schema:
> {
> "type" : "struct",
> "fields" : [
> ......
> }
> Inferred schema:
> {
> "type" : "struct",
> "fields" : [
> ......
> }
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$.mergeWithMetastoreSchema(HiveMetastoreCatalog.scala:295)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$11.apply(HiveMetastoreCatalog.scala:243)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$11.apply(HiveMetastoreCatalog.scala:243)
> at scala.Option.map(Option.scala:146)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog.org$apache$spark$sql$hive$HiveMetastoreCatalog$$inferIfNeeded(HiveMetastoreCatalog.scala:243)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4$$anonfun$5.apply(HiveMetastoreCatalog.scala:167)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4$$anonfun$5.apply(HiveMetastoreCatalog.scala:156)
> at scala.Option.getOrElse(Option.scala:121)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4.apply(HiveMetastoreCatalog.scala:156)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4.apply(HiveMetastoreCatalog.scala:148)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog.withTableCreationLock(HiveMetastoreCatalog.scala:54)
> at org.apache.spark.sql.hive.HiveMetastoreCatalog.convertToLogicalRelation(HiveMetastoreCatalog.scala:148)
> at org.apache.spark.sql.hive.RelationConversions.org$apache$spark$sql$hive$RelationConversions$$convert(HiveStrategies.scala:195)
> at org.apache.spark.sql.hive.RelationConversions$$anonfun$apply$4.applyOrElse(HiveStrategies.scala:226)
> at org.apache.spark.sql.hive.RelationConversions$$anonfun$apply$4.applyOrElse(HiveStrategies.scala:215)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
> at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
> at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:288)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
> at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
> at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
> at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
> at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
> at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
> at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286)
> at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306)
> at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
> at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304)
> at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286)
> at org.apache.spark.sql.hive.RelationConversions.apply(HiveStrategies.scala:215)
> at org.apache.spark.sql.hive.RelationConversions.apply(HiveStrategies.scala:180)
>
> The following case can trigger the exception, so we think it's a bug in spark2.3
> {code:java}
> // Parquet schema is subset of metaStore schema and has uppercase field name
> assertResult(
> StructType(Seq(
> StructField("UPPERCase", DoubleType, nullable = true),
> StructField("lowerCase", BinaryType, nullable = true)))) {
> HiveMetastoreCatalog.mergeWithMetastoreSchema(
> StructType(Seq(
> StructField("UPPERCase", DoubleType, nullable = true),
> StructField("lowerCase", BinaryType, nullable = true))),
> StructType(Seq(
> StructField("lowerCase", BinaryType, nullable = true))))
> }
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org