You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Zhen Wang (Jira)" <ji...@apache.org> on 2022/08/06 13:33:00 UTC
[jira] [Updated] (SPARK-39997) ParquetSchemaConverter fails match schema by id

     [ https://issues.apache.org/jira/browse/SPARK-39997?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Zhen Wang updated SPARK-39997:
------------------------------
    Description: 

{code:scala}
  test("SPARK-38094: absence of field ids: reading nested schema struct field renamed") {
    withTempDir { dir =>
      // now with nested schema/complex type

      val innerTypeRenamed = new StructType().add("c1", IntegerType, true, withId(6));
      val readSchema =
        new StructType()
          .add("c", ArrayType(innerTypeRenamed), true, withId(3))
          .add("e", IntegerType, true, withId(5))


      val innerType = new StructType().add("c0", IntegerType, true, withId(6))
      val writeSchema =
        new StructType()
          .add("c", ArrayType(innerType), true, withId(3))
          .add("randomName", StringType, true)

      val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), "more"))

      spark.createDataFrame(writeData.asJava, writeSchema)
        .write.mode("overwrite").parquet(dir.getCanonicalPath)

      withAllParquetReaders {
        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
          // a, b, c, d all couldn't be found
          Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
      }
    }
  }
{code}


  was:
```
  test("SPARK-38094: absence of field ids: reading nested schema struct field renamed") {
    withTempDir { dir =>
      // now with nested schema/complex type

      val innerTypeRenamed = new StructType().add("c1", IntegerType, true, withId(6));
      val readSchema =
        new StructType()
          .add("c", ArrayType(innerTypeRenamed), true, withId(3))
          .add("e", IntegerType, true, withId(5))


      val innerType = new StructType().add("c0", IntegerType, true, withId(6))
      val writeSchema =
        new StructType()
          .add("c", ArrayType(innerType), true, withId(3))
          .add("randomName", StringType, true)

      val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), "more"))

      spark.createDataFrame(writeData.asJava, writeSchema)
        .write.mode("overwrite").parquet(dir.getCanonicalPath)

      withAllParquetReaders {
        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
          // a, b, c, d all couldn't be found
          Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
      }
    }
  }
```


> ParquetSchemaConverter fails match schema by id
> -----------------------------------------------
>
>                 Key: SPARK-39997
>                 URL: https://issues.apache.org/jira/browse/SPARK-39997
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.3.0
>            Reporter: Zhen Wang
>            Priority: Major
>
> {code:scala}
>   test("SPARK-38094: absence of field ids: reading nested schema struct field renamed") {
>     withTempDir { dir =>
>       // now with nested schema/complex type
>       val innerTypeRenamed = new StructType().add("c1", IntegerType, true, withId(6));
>       val readSchema =
>         new StructType()
>           .add("c", ArrayType(innerTypeRenamed), true, withId(3))
>           .add("e", IntegerType, true, withId(5))
>       val innerType = new StructType().add("c0", IntegerType, true, withId(6))
>       val writeSchema =
>         new StructType()
>           .add("c", ArrayType(innerType), true, withId(3))
>           .add("randomName", StringType, true)
>       val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), "more"))
>       spark.createDataFrame(writeData.asJava, writeSchema)
>         .write.mode("overwrite").parquet(dir.getCanonicalPath)
>       withAllParquetReaders {
>         checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
>           // a, b, c, d all couldn't be found
>           Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
>       }
>     }
>   }
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org