You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Zhen Wang (Jira)" <ji...@apache.org> on 2022/08/06 13:33:00 UTC
[jira] [Updated] (SPARK-39997) ParquetSchemaConverter fails match schema by id
[ https://issues.apache.org/jira/browse/SPARK-39997?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zhen Wang updated SPARK-39997:
------------------------------
Description:
{code:scala}
test("SPARK-38094: absence of field ids: reading nested schema struct field renamed") {
withTempDir { dir =>
// now with nested schema/complex type
val innerTypeRenamed = new StructType().add("c1", IntegerType, true, withId(6));
val readSchema =
new StructType()
.add("c", ArrayType(innerTypeRenamed), true, withId(3))
.add("e", IntegerType, true, withId(5))
val innerType = new StructType().add("c0", IntegerType, true, withId(6))
val writeSchema =
new StructType()
.add("c", ArrayType(innerType), true, withId(3))
.add("randomName", StringType, true)
val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), "more"))
spark.createDataFrame(writeData.asJava, writeSchema)
.write.mode("overwrite").parquet(dir.getCanonicalPath)
withAllParquetReaders {
checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
// a, b, c, d all couldn't be found
Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
}
}
}
{code}
was:
```
test("SPARK-38094: absence of field ids: reading nested schema struct field renamed") {
withTempDir { dir =>
// now with nested schema/complex type
val innerTypeRenamed = new StructType().add("c1", IntegerType, true, withId(6));
val readSchema =
new StructType()
.add("c", ArrayType(innerTypeRenamed), true, withId(3))
.add("e", IntegerType, true, withId(5))
val innerType = new StructType().add("c0", IntegerType, true, withId(6))
val writeSchema =
new StructType()
.add("c", ArrayType(innerType), true, withId(3))
.add("randomName", StringType, true)
val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), "more"))
spark.createDataFrame(writeData.asJava, writeSchema)
.write.mode("overwrite").parquet(dir.getCanonicalPath)
withAllParquetReaders {
checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
// a, b, c, d all couldn't be found
Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
}
}
}
```
> ParquetSchemaConverter fails match schema by id
> -----------------------------------------------
>
> Key: SPARK-39997
> URL: https://issues.apache.org/jira/browse/SPARK-39997
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.3.0
> Reporter: Zhen Wang
> Priority: Major
>
> {code:scala}
> test("SPARK-38094: absence of field ids: reading nested schema struct field renamed") {
> withTempDir { dir =>
> // now with nested schema/complex type
> val innerTypeRenamed = new StructType().add("c1", IntegerType, true, withId(6));
> val readSchema =
> new StructType()
> .add("c", ArrayType(innerTypeRenamed), true, withId(3))
> .add("e", IntegerType, true, withId(5))
> val innerType = new StructType().add("c0", IntegerType, true, withId(6))
> val writeSchema =
> new StructType()
> .add("c", ArrayType(innerType), true, withId(3))
> .add("randomName", StringType, true)
> val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), "more"))
> spark.createDataFrame(writeData.asJava, writeSchema)
> .write.mode("overwrite").parquet(dir.getCanonicalPath)
> withAllParquetReaders {
> checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
> // a, b, c, d all couldn't be found
> Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
> }
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org