You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Alexey Kudinkin (Jira)" <ji...@apache.org> on 2022/03/18 02:05:00 UTC

[jira] [Assigned] (HUDI-3655) AvroRuntimeException from TestLayoutOptimization regarding column stats

     [ https://issues.apache.org/jira/browse/HUDI-3655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Alexey Kudinkin reassigned HUDI-3655:
-------------------------------------

    Assignee: Alexey Kudinkin

> AvroRuntimeException from TestLayoutOptimization regarding column stats
> -----------------------------------------------------------------------
>
>                 Key: HUDI-3655
>                 URL: https://issues.apache.org/jira/browse/HUDI-3655
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Ethan Guo
>            Assignee: Alexey Kudinkin
>            Priority: Blocker
>             Fix For: 0.11.0
>
>
> When running TestLayoutOptimization tests,
> {code:java}
> mvn -DwildcardSuites="abc" -Dtest=TestLayoutOptimization -DfailIfNoTests=false test -pl hudi-spark-datasource/hudi-spark -am {code}
> the following exception is thrown, although the tests don't fail.
> {code:java}
> 123926 [Executor task launch worker for task 474] ERROR org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader  - Got exception when reading log file
> org.apache.avro.AvroRuntimeException: org.apache.avro.AvroRuntimeException: Field columnName type:UNION pos:1 not set and has no default value
> 	at org.apache.hudi.avro.model.HoodieMetadataColumnStats$Builder.build(HoodieMetadataColumnStats.java:838)
> 	at org.apache.hudi.metadata.HoodieTableMetadataUtil.mergeColumnStats(HoodieTableMetadataUtil.java:906)
> 	at org.apache.hudi.metadata.HoodieMetadataPayload.combineColumnStatsMetadata(HoodieMetadataPayload.java:325)
> 	at org.apache.hudi.metadata.HoodieMetadataPayload.preCombine(HoodieMetadataPayload.java:309)
> 	at org.apache.hudi.metadata.HoodieMetadataPayload.preCombine(HoodieMetadataPayload.java:87)
> 	at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.processNextRecord(HoodieMergedLogRecordScanner.java:142)
> 	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.processNextRecord(HoodieMetadataMergedLogRecordReader.java:78)
> 	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:366)
> 	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:427)
> 	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:242)
> 	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:181)
> 	at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:101)
> 	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:71)
> 	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:51)
> 	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader$Builder.build(HoodieMetadataMergedLogRecordReader.java:246)
> 	at org.apache.hudi.metadata.HoodieBackedTableMetadata.getLogRecordScanner(HoodieBackedTableMetadata.java:379)
> 	at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:340)
> 	at org.apache.hudi.HoodieMergeOnReadRDD$$anon$1.<init>(HoodieMergeOnReadRDD.scala:122)
> 	at org.apache.hudi.HoodieMergeOnReadRDD.logFileIterator(HoodieMergeOnReadRDD.scala:113)
> 	at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:79)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337)
> 	at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335)
> 	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
> 	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
> 	at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
> 	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
> 	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
> 	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> 	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
> 	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:123)
> 	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
> 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 	at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.avro.AvroRuntimeException: Field columnName type:UNION pos:1 not set and has no default value
> 	at org.apache.avro.generic.GenericData.getDefaultValue(GenericData.java:1015)
> 	at org.apache.avro.data.RecordBuilderBase.defaultValue(RecordBuilderBase.java:138)
> 	at org.apache.hudi.avro.model.HoodieMetadataColumnStats$Builder.build(HoodieMetadataColumnStats.java:828)
> 	... 56 more {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)