You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Raymond Xu (Jira)" <ji...@apache.org> on 2023/01/10 02:44:00 UTC
[jira] [Updated] (HUDI-5160) Spark df saveAsTable failed with CTAS
[ https://issues.apache.org/jira/browse/HUDI-5160?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Raymond Xu updated HUDI-5160:
-----------------------------
Sprint: 0.13.0 Final Sprint, 0.13.0 Final Sprint 2 (was: 0.13.0 Final Sprint)
> Spark df saveAsTable failed with CTAS
> -------------------------------------
>
> Key: HUDI-5160
> URL: https://issues.apache.org/jira/browse/HUDI-5160
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark-sql
> Reporter: 董可伦
> Assignee: Raymond Xu
> Priority: Blocker
> Labels: pull-request-available
> Fix For: 0.13.0
>
>
> In 0.9.0 Version,It's ok,But now failed
> {code:java}
> import spark.implicits._
> val partitionValue = "2022-11-05"
> val df = Seq((1, "a1", 10, 1000, partitionValue)).toDF("id", "name", "value", "ts", "dt")
> val tableName = "test_hudi_table"
> // Write a table by spark dataframe.
> df.write.format("hudi")
> .option(HoodieWriteConfig.TBL_NAME.key, tableName)
> .option(TABLE_TYPE.key, MOR_TABLE_TYPE_OPT_VAL)
> // .option(HoodieTableConfig.TYPE.key(), MOR_TABLE_TYPE_OPT_VAL)
> .option(RECORDKEY_FIELD.key, "id")
> .option(PRECOMBINE_FIELD.key, "ts")
> .option(PARTITIONPATH_FIELD.key, "dt")
> .option(KEYGENERATOR_CLASS_NAME.key, classOf[SimpleKeyGenerator].getName)
> .option(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "1")
> .option(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "1")
> .partitionBy("dt")
> .mode(SaveMode.Overwrite)
> .saveAsTable(tableName){code}
>
> {code:java}
> Can't find primaryKey `uuid` in root
> |-- _hoodie_commit_time: string (nullable = true)
> |-- _hoodie_commit_seqno: string (nullable = true)
> |-- _hoodie_record_key: string (nullable = true)
> |-- _hoodie_partition_path: string (nullable = true)
> |-- _hoodie_file_name: string (nullable = true)
> |-- id: integer (nullable = false)
> |-- name: string (nullable = true)
> |-- value: integer (nullable = false)
> |-- ts: integer (nullable = false)
> |-- dt: string (nullable = true)
> .
> java.lang.IllegalArgumentException: Can't find primaryKey `uuid` in root
> |-- _hoodie_commit_time: string (nullable = true)
> |-- _hoodie_commit_seqno: string (nullable = true)
> |-- _hoodie_record_key: string (nullable = true)
> |-- _hoodie_partition_path: string (nullable = true)
> |-- _hoodie_file_name: string (nullable = true)
> |-- id: integer (nullable = false)
> |-- name: string (nullable = true)
> |-- value: integer (nullable = false)
> |-- ts: integer (nullable = false)
> |-- dt: string (nullable = true)
> .
> at org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:40)
> at org.apache.spark.sql.hudi.HoodieOptionConfig$$anonfun$validateTable$1.apply(HoodieOptionConfig.scala:201)
> at org.apache.spark.sql.hudi.HoodieOptionConfig$$anonfun$validateTable$1.apply(HoodieOptionConfig.scala:200)
> at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
> at org.apache.spark.sql.hudi.HoodieOptionConfig$.validateTable(HoodieOptionConfig.scala:200)
> at org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.parseSchemaAndConfigs(HoodieCatalogTable.scala:256)
> at org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.initHoodieTable(HoodieCatalogTable.scala:171)
> at org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand.run(CreateHoodieTableAsSelectCommand.scala:99){code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)