You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Ethan Guo (Jira)" <ji...@apache.org> on 2022/03/18 01:59:00 UTC

[jira] [Created] (HUDI-3655) AvroRuntimeException from TestLayoutOptimization regarding column stats

Ethan Guo created HUDI-3655:
-------------------------------

             Summary: AvroRuntimeException from TestLayoutOptimization regarding column stats
                 Key: HUDI-3655
                 URL: https://issues.apache.org/jira/browse/HUDI-3655
             Project: Apache Hudi
          Issue Type: Bug
            Reporter: Ethan Guo
             Fix For: 0.11.0


When running TestLayoutOptimization tests,
{code:java}
mvn -DwildcardSuites="abc" -Dtest=TestLayoutOptimization -DfailIfNoTests=false test -pl hudi-spark-datasource/hudi-spark -am {code}
the following exception is thrown, although the tests don't fail.
{code:java}
123926 [Executor task launch worker for task 474] ERROR org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader  - Got exception when reading log file
org.apache.avro.AvroRuntimeException: org.apache.avro.AvroRuntimeException: Field columnName type:UNION pos:1 not set and has no default value
	at org.apache.hudi.avro.model.HoodieMetadataColumnStats$Builder.build(HoodieMetadataColumnStats.java:838)
	at org.apache.hudi.metadata.HoodieTableMetadataUtil.mergeColumnStats(HoodieTableMetadataUtil.java:906)
	at org.apache.hudi.metadata.HoodieMetadataPayload.combineColumnStatsMetadata(HoodieMetadataPayload.java:325)
	at org.apache.hudi.metadata.HoodieMetadataPayload.preCombine(HoodieMetadataPayload.java:309)
	at org.apache.hudi.metadata.HoodieMetadataPayload.preCombine(HoodieMetadataPayload.java:87)
	at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.processNextRecord(HoodieMergedLogRecordScanner.java:142)
	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.processNextRecord(HoodieMetadataMergedLogRecordReader.java:78)
	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:366)
	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:427)
	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:242)
	at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:181)
	at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:101)
	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:71)
	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:51)
	at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader$Builder.build(HoodieMetadataMergedLogRecordReader.java:246)
	at org.apache.hudi.metadata.HoodieBackedTableMetadata.getLogRecordScanner(HoodieBackedTableMetadata.java:379)
	at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:340)
	at org.apache.hudi.HoodieMergeOnReadRDD$$anon$1.<init>(HoodieMergeOnReadRDD.scala:122)
	at org.apache.hudi.HoodieMergeOnReadRDD.logFileIterator(HoodieMergeOnReadRDD.scala:113)
	at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:79)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337)
	at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335)
	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
	at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
	at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.avro.AvroRuntimeException: Field columnName type:UNION pos:1 not set and has no default value
	at org.apache.avro.generic.GenericData.getDefaultValue(GenericData.java:1015)
	at org.apache.avro.data.RecordBuilderBase.defaultValue(RecordBuilderBase.java:138)
	at org.apache.hudi.avro.model.HoodieMetadataColumnStats$Builder.build(HoodieMetadataColumnStats.java:828)
	... 56 more {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)