You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/10/20 23:06:28 UTC
[GitHub] [hudi] umehrot2 commented on issue #6900: [SUPPORT]Hudi Failed to read MARKERS file
umehrot2 commented on issue #6900:
URL: https://github.com/apache/hudi/issues/6900#issuecomment-1286257672
Latest stack trace, when using structured streaming with the following properties:
```
hoodie.datasource.hive_sync.enable=false
hoodie.upsert.shuffle.parallelism=20
hoodie.insert.shuffle.parallelism=20
hoodie.keep.min.commits=6
hoodie.keep.max.commits=7
hoodie.parquet.small.file.limit=52428800
hoodie.index.type=GLOBAL_BLOOM
hoodie.datasource.write.payload.class=org.apache.hudi.common.model.DefaultHoodieRecordPayload
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
hoodie.metadata.enable=true
hoodie.cleaner.commits.retained=3
hoodie.clean.max.commits=5
hoodie.clean.async=false
hoodie.clean.automatic=true
hoodie.archive.async=false
hoodie.datasource.compaction.async.enable=true
hoodie.write.markers.type=DIRECT
hoodie.embed.timeline.server=true
hoodie.embed.timeline.server.async=false
hoodie.compact.schedule.inline=false
hoodie.compact.inline.max.delta.commits=2
```
Stacktrace:
```
22/10/19 15:36:18 ERROR UpsertPartitioner: Error trying to compute average bytes/record
org.apache.hudi.exception.HoodieIOException: Could not read commit details from s3://app-util/hudi-bloom/multi-stream-105/cdc_test_db/dhdata_15/.hoodie/20221019152438682.commit
at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:761)
at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.getInstantDetails(HoodieActiveTimeline.java:266)
at org.apache.hudi.common.table.timeline.HoodieDefaultTimeline.getInstantDetails(HoodieDefaultTimeline.java:372)
at org.apache.hudi.table.action.commit.UpsertPartitioner.averageBytesPerRecord(UpsertPartitioner.java:373)
at org.apache.hudi.table.action.commit.UpsertPartitioner.assignInserts(UpsertPartitioner.java:162)
at org.apache.hudi.table.action.commit.UpsertPartitioner.<init>(UpsertPartitioner.java:95)
at org.apache.hudi.table.action.deltacommit.SparkUpsertDeltaCommitPartitioner.<init>(SparkUpsertDeltaCommitPartitioner.java:50)
at org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor.getUpsertPartitioner(BaseSparkDeltaCommitActionExecutor.java:69)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getPartitioner(BaseSparkCommitActionExecutor.java:217)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:163)
at org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:85)
at org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:57)
at org.apache.hudi.table.action.deltacommit.SparkUpsertDeltaCommitActionExecutor.execute(SparkUpsertDeltaCommitActionExecutor.java:46)
at org.apache.hudi.table.HoodieSparkMergeOnReadTable.upsert(HoodieSparkMergeOnReadTable.java:89)
at org.apache.hudi.table.HoodieSparkMergeOnReadTable.upsert(HoodieSparkMergeOnReadTable.java:76)
at org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:157)
at org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:213)
at org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:304)
at org.apache.hudi.HoodieStreamingSink.$anonfun$addBatch$2(HoodieStreamingSink.scala:91)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.hudi.HoodieStreamingSink.$anonfun$addBatch$1(HoodieStreamingSink.scala:90)
at org.apache.hudi.HoodieStreamingSink.retry(HoodieStreamingSink.scala:166)
at org.apache.hudi.HoodieStreamingSink.addBatch(HoodieStreamingSink.scala:89)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
at org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
at org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
at org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
at org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
at org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
Caused by: java.io.FileNotFoundException: No such file or directory 's3://app-util/hudi-bloom/multi-stream-105/cdc_test_db/dhdata_15/.hoodie/20221019152438682.commit'
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.getFileStatus(S3NativeFileSystem.java:521)
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:932)
at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:924)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:906)
at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:194)
at org.apache.hudi.common.fs.HoodieWrapperFileSystem.open(HoodieWrapperFileSystem.java:460)
at org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:758)
... 52 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org