You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "xi chaomin (Jira)" <ji...@apache.org> on 2022/07/30 06:57:00 UTC
[jira] [Updated] (HUDI-4509) FileNotFoundException during doing readStream
[ https://issues.apache.org/jira/browse/HUDI-4509?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
xi chaomin updated HUDI-4509:
-----------------------------
Description:
When I read hudi table with spark readStream, FileNotFoundException happens after a while
{code:java}
val df: DataFrame = spark.readStream.format("org.apache.hudi")
.load("hdfs://10.19.29.148:8020/tmp/hudi/ss_bucket")
df.writeStream
.format("console")
.trigger(Trigger.ProcessingTime("5 seconds"))
.option("checkpointLocation", "/tmp/hoodie/checkpoint_xicm")
.start()
.awaitTermination()
{code}
Caused by: org.apache.hudi.exception.HoodieException: Exception when reading log file
at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:352) ~[classes/:?]
at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324) ~[classes/:?]
at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:402) ~[classes/:?]
at org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:196) ~[classes/:?]
at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:124) ~[classes/:?]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.scheduler.Task.run(Task.scala:123) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_211]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_211]
at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_211]
Caused by: org.apache.hudi.exception.HoodieIOException: IOException when reading logblock from log file HoodieLogFile{pathStr='hdfs://10.19.29.148:8020/tmp/hudi/ss_bucket/.00000000-cfd8-4973-badf-2cf7d5853cab-0_20220729172717399.log.1_0-1033-1923', fileLen=-1}
at org.apache.hudi.common.table.log.HoodieLogFileReader.next(HoodieLogFileReader.java:389) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieLogFormatReader.next(HoodieLogFormatReader.java:123) ~[classes/:?]
at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:229) ~[classes/:?]
at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103) ~[classes/:?]
at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324) ~[classes/:?]
at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:402) ~[classes/:?]
at org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:196) ~[classes/:?]
at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:124) ~[classes/:?]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.scheduler.Task.run(Task.scala:123) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) ~[spark-core_2.11-2.4.4.jar:2.4.4]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_211]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_211]
at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_211]
Caused by: java.io.FileNotFoundException: File does not exist: /tmp/hudi/ss_bucket/.00000000-cfd8-4973-badf-2cf7d5853cab-0_20220729172717399.log.1_0-1033-1923
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
was:
When I read hudi table with spark readStream, FileNotFoundException happens after a while
{code:java}
val df: DataFrame = spark.readStream.format("org.apache.hudi")
.load("hdfs://10.19.29.148:8020/tmp/hudi/ss_bucket")
df.writeStream
.format("console")
.trigger(Trigger.ProcessingTime("5 seconds"))
.option("checkpointLocation", "/tmp/hoodie/checkpoint_xicm")
.start()
.awaitTermination()
{code}
> FileNotFoundException during doing readStream
> ---------------------------------------------
>
> Key: HUDI-4509
> URL: https://issues.apache.org/jira/browse/HUDI-4509
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: xi chaomin
> Priority: Major
>
> When I read hudi table with spark readStream, FileNotFoundException happens after a while
> {code:java}
> val df: DataFrame = spark.readStream.format("org.apache.hudi")
> .load("hdfs://10.19.29.148:8020/tmp/hudi/ss_bucket")
> df.writeStream
> .format("console")
> .trigger(Trigger.ProcessingTime("5 seconds"))
> .option("checkpointLocation", "/tmp/hoodie/checkpoint_xicm")
> .start()
> .awaitTermination()
> {code}
> Caused by: org.apache.hudi.exception.HoodieException: Exception when reading log file
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:352) ~[classes/:?]
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324) ~[classes/:?]
> at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:402) ~[classes/:?]
> at org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:196) ~[classes/:?]
> at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:124) ~[classes/:?]
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.scheduler.Task.run(Task.scala:123) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_211]
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_211]
> at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_211]
> Caused by: org.apache.hudi.exception.HoodieIOException: IOException when reading logblock from log file HoodieLogFile{pathStr='hdfs://10.19.29.148:8020/tmp/hudi/ss_bucket/.00000000-cfd8-4973-badf-2cf7d5853cab-0_20220729172717399.log.1_0-1033-1923', fileLen=-1}
> at org.apache.hudi.common.table.log.HoodieLogFileReader.next(HoodieLogFileReader.java:389) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieLogFormatReader.next(HoodieLogFormatReader.java:123) ~[classes/:?]
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:229) ~[classes/:?]
> at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:192) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:110) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:103) ~[classes/:?]
> at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner$Builder.build(HoodieMergedLogRecordScanner.java:324) ~[classes/:?]
> at org.apache.hudi.HoodieMergeOnReadRDD$.scanLog(HoodieMergeOnReadRDD.scala:402) ~[classes/:?]
> at org.apache.hudi.HoodieMergeOnReadRDD$LogFileIterator.<init>(HoodieMergeOnReadRDD.scala:196) ~[classes/:?]
> at org.apache.hudi.HoodieMergeOnReadRDD.compute(HoodieMergeOnReadRDD.scala:124) ~[classes/:?]
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.scheduler.Task.run(Task.scala:123) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) ~[spark-core_2.11-2.4.4.jar:2.4.4]
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_211]
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_211]
> at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_211]
> Caused by: java.io.FileNotFoundException: File does not exist: /tmp/hudi/ss_bucket/.00000000-cfd8-4973-badf-2cf7d5853cab-0_20220729172717399.log.1_0-1033-1923
> at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
> at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
--
This message was sent by Atlassian Jira
(v8.20.10#820010)