You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "sivabalan narayanan (Jira)" <ji...@apache.org> on 2023/03/30 02:05:00 UTC
[jira] [Updated] (HUDI-4113) Cannot parse schema when use spark delete sql
[ https://issues.apache.org/jira/browse/HUDI-4113?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
sivabalan narayanan updated HUDI-4113:
--------------------------------------
Fix Version/s: 0.12.2
> Cannot parse <null> schema when use spark delete sql
> ----------------------------------------------------
>
> Key: HUDI-4113
> URL: https://issues.apache.org/jira/browse/HUDI-4113
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark-sql
> Reporter: KnightChess
> Assignee: KnightChess
> Priority: Minor
> Labels: pull-request-available
> Fix For: 0.12.2, 0.13.0
>
>
> cause `Cannot parse <null> schema` when use delete sql after alter drop partitions
> h3. Devices:
> {code:java}
> CREATE TABLE delete_error_test(
> id int,
> age int,
> name string,
> dt string)
> using hudi
> options (
> type = 'cow',
> primaryKey = 'id',
> hoodie.table.precombine.field = 'age',
> hoodie.datasource.query.type = 'snapshot',
> hoodie.metadata.enable = 'true',
> hoodie.metadata.compact.max.delta.commits = '1',
> hoodie.metadata.keep.min.commits = '2',
> hoodie.metadata.keep.max.commits = '3',
> hoodie.cleaner.policy = 'KEEP_LATEST_COMMITS',
> hoodie.cleaner.commits.retained = '3',
> hoodie.datasource.meta.sync.enable = 'true'
> )
> partitioned by (dt)";
>
>
> insert overwrite delete_error_test select 1, 18, 'wlq', '20220101';
> insert overwrite delete_error_test select 2, 19, 'wlq', '20220102';
> alter table delete_error_test drop partition (dt = '20220101');
>
> # after drop other partitions, this sql will trigger error
> delete from delete_error_test where dt = '20220102'; {code}
> h3. stack:
> {code:java}
> Caused by: org.apache.hudi.exception.HoodieUpsertException: Error upserting bucketType UPDATE for partition :0
> at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:329)
> at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$a3ab3c4$1(BaseSparkCommitActionExecutor.java:244)
> at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102)
> at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)
> at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:386)
> at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1498)
> at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$doPut(BlockManager.scala:1408)
> at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1472)
> at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1295)
> at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> at org.apache.spark.scheduler.Task.run(Task.scala:131)
> at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.avro.SchemaParseException: Cannot parse <null> schema
> at org.apache.avro.Schema.parse(Schema.java:1633)
> at org.apache.avro.Schema$Parser.parse(Schema.java:1430)
> at org.apache.avro.Schema$Parser.parse(Schema.java:1418)
> at org.apache.hudi.io.HoodieWriteHandle.getSpecifiedTableSchema(HoodieWriteHandle.java:137)
> at org.apache.hudi.io.HoodieWriteHandle.lambda$new$0(HoodieWriteHandle.java:119)
> at org.apache.hudi.common.util.Option.orElseGet(Option.java:142)
> at org.apache.hudi.io.HoodieWriteHandle.<init>(HoodieWriteHandle.java:119)
> at org.apache.hudi.io.HoodieWriteHandle.<init>(HoodieWriteHandle.java:109)
> at org.apache.hudi.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:123)
> at org.apache.hudi.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:116)
> at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getUpdateHandle(BaseSparkCommitActionExecutor.java:377)
> at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:348)
> at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:322)
> ... 28 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)