You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "KnightChess (Jira)" <ji...@apache.org> on 2022/05/17 14:02:00 UTC
[jira] [Created] (HUDI-4113) Cannot parse schema when use spark delete sql
KnightChess created HUDI-4113:
---------------------------------
Summary: Cannot parse <null> schema when use spark delete sql
Key: HUDI-4113
URL: https://issues.apache.org/jira/browse/HUDI-4113
Project: Apache Hudi
Issue Type: Bug
Components: spark-sql
Reporter: KnightChess
Assignee: KnightChess
cause `Cannot parse <null> schema` when use delete sql after alter drop partitions
#### Devices:
```sql
CREATE TABLE delete_error_test(
id int,
age int,
name string,
dt string)
using hudi
options (
type = 'cow',
primaryKey = 'id',
hoodie.table.precombine.field = 'age',
hoodie.datasource.query.type = 'snapshot',
hoodie.metadata.enable = 'true',
hoodie.metadata.compact.max.delta.commits = '1',
hoodie.metadata.keep.min.commits = '2',
hoodie.metadata.keep.max.commits = '3',
hoodie.cleaner.policy = 'KEEP_LATEST_COMMITS',
hoodie.cleaner.commits.retained = '3',
hoodie.datasource.meta.sync.enable = 'true'
)
partitioned by (dt)";
insert overwrite delete_error_test select 1, 18, 'wlq', '20220101';
insert overwrite delete_error_test select 2, 19, 'wlq', '20220102';
alter table delete_error_test drop partition (dt = '20220101');
# after drop other partitions, this sql will trigger error
delete from delete_error_test where dt = '20220102';
```
#### stack:
```shell
Caused by: org.apache.hudi.exception.HoodieUpsertException: Error upserting bucketType UPDATE for partition :0
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:329)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.lambda$mapPartitionsAsRDD$a3ab3c4$1(BaseSparkCommitActionExecutor.java:244)
at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1(JavaRDDLike.scala:102)
at org.apache.spark.api.java.JavaRDDLike.$anonfun$mapPartitionsWithIndex$1$adapted(JavaRDDLike.scala:102)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.$anonfun$getOrCompute$1(RDD.scala:386)
at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1498)
at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$doPut(BlockManager.scala:1408)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1472)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1295)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:335)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.avro.SchemaParseException: Cannot parse <null> schema
at org.apache.avro.Schema.parse(Schema.java:1633)
at org.apache.avro.Schema$Parser.parse(Schema.java:1430)
at org.apache.avro.Schema$Parser.parse(Schema.java:1418)
at org.apache.hudi.io.HoodieWriteHandle.getSpecifiedTableSchema(HoodieWriteHandle.java:137)
at org.apache.hudi.io.HoodieWriteHandle.lambda$new$0(HoodieWriteHandle.java:119)
at org.apache.hudi.common.util.Option.orElseGet(Option.java:142)
at org.apache.hudi.io.HoodieWriteHandle.<init>(HoodieWriteHandle.java:119)
at org.apache.hudi.io.HoodieWriteHandle.<init>(HoodieWriteHandle.java:109)
at org.apache.hudi.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:123)
at org.apache.hudi.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:116)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getUpdateHandle(BaseSparkCommitActionExecutor.java:377)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpdate(BaseSparkCommitActionExecutor.java:348)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:322)
... 28 more
```
--
This message was sent by Atlassian Jira
(v8.20.7#820007)