You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "ASF GitHub Bot (Jira)" <ji...@apache.org> on 2022/04/01 15:12:00 UTC

[jira] [Updated] (HUDI-3708) Upsert to metadata table fails due to schema change

     [ https://issues.apache.org/jira/browse/HUDI-3708?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

ASF GitHub Bot updated HUDI-3708:
---------------------------------
    Labels: pull-request-available  (was: )

> Upsert to metadata table fails due to schema change
> ---------------------------------------------------
>
>                 Key: HUDI-3708
>                 URL: https://issues.apache.org/jira/browse/HUDI-3708
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Ethan Guo
>            Priority: Blocker
>              Labels: pull-request-available
>             Fix For: 0.11.0
>
>
> Scenario: Deltastreamer continuous mode, COW table, single writer with async clustering and cleaning.  Only files partition is enabled in metadata table.  The table is written before the metadata schema change (adding "columnName").  When using the new writer with the new schema, the upsert to metadata table fails with schema compatibility check. 
> {code:java}
> 22/03/23 23:11:38 WARN CleanActionExecutor: Failed to perform previous clean operation, instant: [==>20220314172020474__clean__INFLIGHT]
> org.apache.hudi.exception.HoodieUpsertException: Failed upsert schema compatibility check.
>     at org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:729)
>     at org.apache.hudi.client.SparkRDDWriteClient.upsertPreppedRecords(SparkRDDWriteClient.java:169)
>     at org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:154)
>     at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:670)
>     at org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:694)
>     at org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$1(BaseActionExecutor.java:69)
>     at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
>     at org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:69)
>     at org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:211)
>     at org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:176)
>     at org.apache.hudi.table.action.clean.CleanActionExecutor.lambda$execute$6(CleanActionExecutor.java:238)
>     at java.util.ArrayList.forEach(ArrayList.java:1259)
>     at org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:232)
>     at org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:339)
>     at org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:781)
>     at org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:738)
>     at org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:55)
>     at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
>     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.hudi.exception.HoodieException: Failed schema compatibility check for writerSchema :{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A record saved within the Metadata Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type of the metadata record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains information about partitions and files within the dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data file bloom filter details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom filter type code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant timestamp when this metadata was created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all data files in the user table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data file column statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File name for which this column statistics applies"},{"name":"columnName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Column name for which this column statistics applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum value in the range. Based on user data table schema, we can convert this to appropriate type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum value in the range. Based on user data table schema, we can convert it to appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total count of null values"},{"name":"totalSize","type":["null","long"],"doc":"Total storage size on disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total uncompressed storage size on disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all data files in the user table","default":null}]}, table schema :{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A record saved within the Metadata Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type of the metadata record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains information about partitions and files within the dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data file bloom filter details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom filter type code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant timestamp when this metadata was created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all data files in the user table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data file column statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File name for which this column statistics applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum value in the range. Based on user data table schema, we can convert this to appropriate type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum value in the range. Based on user data table schema, we can convert it to appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total count of null values"},{"name":"totalSize","type":["null","long"],"doc":"Total storage size on disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total uncompressed storage size on disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all data files in the user table","default":null}]}, base path :file:/Users/ethan/Work/scripts/mt_rollout_testing/deploy_b_single_writer_async_services/b3_ds_cow_010mt_011mt_conf_fix2/test_table/.hoodie/metadata
>     at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:721)
>     at org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:727)
>     ... 20 more {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)