You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Raymond Xu (Jira)" <ji...@apache.org> on 2023/01/18 21:39:00 UTC

[jira] [Closed] (HUDI-5160) Spark df saveAsTable failed with CTAS

     [ https://issues.apache.org/jira/browse/HUDI-5160?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Raymond Xu closed HUDI-5160.
----------------------------
    Resolution: Fixed

> Spark df saveAsTable failed with CTAS
> -------------------------------------
>
>                 Key: HUDI-5160
>                 URL: https://issues.apache.org/jira/browse/HUDI-5160
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: spark-sql
>            Reporter: 董可伦
>            Assignee: Raymond Xu
>            Priority: Blocker
>              Labels: pull-request-available
>             Fix For: 0.13.0
>
>
> In 0.9.0 Version,It's ok,But now failed
> {code:java}
> import spark.implicits._
> val partitionValue = "2022-11-05"
> val df = Seq((1, "a1", 10, 1000, partitionValue)).toDF("id", "name", "value", "ts", "dt")
> val tableName = "test_hudi_table"
> // Write a table by spark dataframe.
> df.write.format("hudi")
> .option(HoodieWriteConfig.TBL_NAME.key, tableName)
> .option(TABLE_TYPE.key, MOR_TABLE_TYPE_OPT_VAL)
> // .option(HoodieTableConfig.TYPE.key(), MOR_TABLE_TYPE_OPT_VAL)
> .option(RECORDKEY_FIELD.key, "id")
> .option(PRECOMBINE_FIELD.key, "ts")
> .option(PARTITIONPATH_FIELD.key, "dt")
> .option(KEYGENERATOR_CLASS_NAME.key, classOf[SimpleKeyGenerator].getName)
> .option(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "1")
> .option(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "1")
> .partitionBy("dt")
> .mode(SaveMode.Overwrite)
> .saveAsTable(tableName){code}
>  
> {code:java}
> Can't find primaryKey `uuid` in root
>  |-- _hoodie_commit_time: string (nullable = true)
>  |-- _hoodie_commit_seqno: string (nullable = true)
>  |-- _hoodie_record_key: string (nullable = true)
>  |-- _hoodie_partition_path: string (nullable = true)
>  |-- _hoodie_file_name: string (nullable = true)
>  |-- id: integer (nullable = false)
>  |-- name: string (nullable = true)
>  |-- value: integer (nullable = false)
>  |-- ts: integer (nullable = false)
>  |-- dt: string (nullable = true)
> .
> java.lang.IllegalArgumentException: Can't find primaryKey `uuid` in root
>  |-- _hoodie_commit_time: string (nullable = true)
>  |-- _hoodie_commit_seqno: string (nullable = true)
>  |-- _hoodie_record_key: string (nullable = true)
>  |-- _hoodie_partition_path: string (nullable = true)
>  |-- _hoodie_file_name: string (nullable = true)
>  |-- id: integer (nullable = false)
>  |-- name: string (nullable = true)
>  |-- value: integer (nullable = false)
>  |-- ts: integer (nullable = false)
>  |-- dt: string (nullable = true)
> .
>     at org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40)
>     at org.apache.spark.sql.hudi.HoodieOptionConfig$$anonfun$validateTable$1.apply(HoodieOptionConfig.scala:201)
>     at org.apache.spark.sql.hudi.HoodieOptionConfig$$anonfun$validateTable$1.apply(HoodieOptionConfig.scala:200)
>     at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
>     at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
>     at org.apache.spark.sql.hudi.HoodieOptionConfig$.validateTable(HoodieOptionConfig.scala:200)
>     at org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.parseSchemaAndConfigs(HoodieCatalogTable.scala:256)
>     at org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.initHoodieTable(HoodieCatalogTable.scala:171)
>     at org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand.run(CreateHoodieTableAsSelectCommand.scala:99){code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)