You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Alexey Kudinkin (Jira)" <ji...@apache.org> on 2022/03/28 22:29:00 UTC
[jira] [Closed] (HUDI-3655) AvroRuntimeException from TestLayoutOptimization regarding column stats
[ https://issues.apache.org/jira/browse/HUDI-3655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alexey Kudinkin closed HUDI-3655.
---------------------------------
Resolution: Duplicate
> AvroRuntimeException from TestLayoutOptimization regarding column stats
> -----------------------------------------------------------------------
>
> Key: HUDI-3655
> URL: https://issues.apache.org/jira/browse/HUDI-3655
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Ethan Guo
> Assignee: Alexey Kudinkin
> Priority: Blocker
> Fix For: 0.11.0
>
>
> When running TestLayoutOptimization tests,
> {code:java}
> mvn -DwildcardSuites="abc" -Dtest=TestLayoutOptimization -DfailIfNoTests=false test -pl hudi-spark-datasource/hudi-spark -am {code}
> the following exception is thrown, although the tests don't fail.
> {code:java}
> 123926 [Executor task launch worker for task 474] ERROR org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader - Got exception when reading log file
> org.apache.avro.AvroRuntimeException: org.apache.avro.AvroRuntimeException: Field columnName type:UNION pos:1 not set and has no default value
> at org.apache.hudi.avro.model.HoodieMetadataColumnStats$Builder.build(HoodieMetadataColumnStats.java:838)
> at org.apache.hudi.metadata.HoodieTableMetadataUtil.mergeColumnStats(HoodieTableMetadataUtil.java:906)
> at org.apache.hudi.metadata.HoodieMetadataPayload.combineColumnStatsMetadata(HoodieMetadataPayload.java:325)
> at org.apache.hudi.metadata.HoodieMetadataPayload.preCombine(HoodieMetadataPayload.java:309)
> at org.apache.hudi.metadata.HoodieMetadataPayload.preCombine(HoodieMetadataPayload.java:87)
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.processNextRecord(HoodieMergedLogRecordScanner.java:142)
> at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.processNextRecord(HoodieMetadataMergedLogRecordReader.java:78)
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:366)
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:427)
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:242)
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:181)
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:101)
> at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:71)
> at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:51)
> at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader$Builder.build(HoodieMetadataMergedLogRecordReader.java:246)
> at org.apache.hudi.metadata.HoodieBackedTableMetadata.getLogRecordScanner(HoodieBackedTableMetadata.java:379)
> at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:340)
> at org.apache.hudi.HoodieMergeOnReadRDD$$anon$1.<init>(HoodieMergeOnReadRDD.scala:122)
> at org.apache.hudi.HoodieMergeOnReadRDD.logFileIterator(HoodieMergeOnReadRDD.scala:113)
> at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:79)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337)
> at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335)
> at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
> at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
> at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
> at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
> at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
> at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
> at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
> at org.apache.spark.scheduler.Task.run(Task.scala:123)
> at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.avro.AvroRuntimeException: Field columnName type:UNION pos:1 not set and has no default value
> at org.apache.avro.generic.GenericData.getDefaultValue(GenericData.java:1015)
> at org.apache.avro.data.RecordBuilderBase.defaultValue(RecordBuilderBase.java:138)
> at org.apache.hudi.avro.model.HoodieMetadataColumnStats$Builder.build(HoodieMetadataColumnStats.java:828)
> ... 56 more {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)