You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Sagar Sumit (Jira)" <ji...@apache.org> on 2023/01/24 02:55:00 UTC
[jira] [Closed] (HUDI-5555) Set class loader for parquet data block
[ https://issues.apache.org/jira/browse/HUDI-5555?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sagar Sumit closed HUDI-5555.
-----------------------------
Resolution: Fixed
> Set class loader for parquet data block
> ---------------------------------------
>
> Key: HUDI-5555
> URL: https://issues.apache.org/jira/browse/HUDI-5555
> Project: Apache Hudi
> Issue Type: Bug
> Components: core
> Reporter: Jonathan Vexler
> Assignee: Jonathan Vexler
> Priority: Blocker
> Labels: pull-request-available
> Fix For: 0.13.0
>
>
> inlineConf.setClassLoader(InLineFileSystem.class.getClassLoader());
> is set in HoodieHFileDataBlock but it is not set in HoodieParquetDataBlock
> This causes
> {code:java}
> java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hudi.common.fs.inline.InLineFileSystem not found at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2667) at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3431) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3466) at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:174) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3574) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3521) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:365) at org.apache.parquet.hadoop.ParquetReader$Builder.build(ParquetReader.java:336) at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:168) at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:99) at org.apache.hudi.io.storage.HoodieAvroFileReaderBase.getRecordIterator(HoodieAvroFileReaderBase.java:39) at org.apache.hudi.io.storage.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:53) at org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:162) at org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:128) at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:779) at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:641) at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:691) at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:379) at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:231) at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:220) at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:114) at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:107) at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:61) at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:49) at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader$Builder.build(HoodieMetadataMergedLogRecordReader.java:232) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getLogRecordScanner(HoodieBackedTableMetadata.java:528) at org.apache.hudi.metadata.HoodieBackedTableMetadata.openReaders(HoodieBackedTableMetadata.java:438) at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getOrCreateReaders$12(HoodieBackedTableMetadata.java:421) at java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getOrCreateReaders(HoodieBackedTableMetadata.java:421) at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$2(HoodieBackedTableMetadata.java:227) at java.util.HashMap.forEach(HashMap.java:1290) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:225) at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:148) at org.apache.hudi.metadata.BaseTableMetadata.fetchAllFilesInPartition(BaseTableMetadata.java:327) at org.apache.hudi.metadata.BaseTableMetadata.getAllFilesInPartition(BaseTableMetadata.java:145) at org.apache.hudi.metadata.HoodieMetadataFileSystemView.listPartition(HoodieMetadataFileSystemView.java:65) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$ensurePartitionLoadedCorrectly$10(AbstractTableFileSystemView.java:311) at java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.ensurePartitionLoadedCorrectly(AbstractTableFileSystemView.java:302) at org.apache.hudi.common.table.view.AbstractTableFileSystemView.getLatestBaseFiles(AbstractTableFileSystemView.java:515) at org.apache.hudi.hadoop.HoodieROTablePathFilter.accept(HoodieROTablePathFilter.java:200) at org.apache.spark.sql.execution.datasources.PathFilterWrapper.accept(InMemoryFileIndex.scala:165) at org.apache.spark.util.HadoopFSUtils$.$anonfun$listLeafFiles$8(HadoopFSUtils.scala:285) at org.apache.spark.util.HadoopFSUtils$.$anonfun$listLeafFiles$8$adapted(HadoopFSUtils.scala:285) at scala.collection.TraversableLike.$anonfun$filterImpl$1(TraversableLike.scala:304) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:303) at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:297) at scala.collection.mutable.ArrayOps$ofRef.filterImpl(ArrayOps.scala:198) at scala.collection.TraversableLike.filter(TraversableLike.scala:395) at scala.collection.TraversableLike.filter$(TraversableLike.scala:395) at scala.collection.mutable.ArrayOps$ofRef.filter(ArrayOps.scala:198) at org.apache.spark.util.HadoopFSUtils$.listLeafFiles(HadoopFSUtils.scala:285) at org.apache.spark.util.HadoopFSUtils$.$anonfun$parallelListLeafFilesInternal$1(HadoopFSUtils.scala:95) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.spark.util.HadoopFSUtils$.parallelListLeafFilesInternal(HadoopFSUtils.scala:85) at org.apache.spark.util.HadoopFSUtils$.parallelListLeafFiles(HadoopFSUtils.scala:69) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex$.bulkListLeafFiles(InMemoryFileIndex.scala:158) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.listLeafFiles(InMemoryFileIndex.scala:131) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.refresh0(InMemoryFileIndex.scala:94) at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.<init>(InMemoryFileIndex.scala:66) at org.apache.spark.sql.execution.datasources.DataSource.createInMemoryFileIndex(DataSource.scala:565) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:409) at org.apache.hudi.BaseFileOnlyRelation.toHadoopFsRelation(BaseFileOnlyRelation.scala:203) at org.apache.hudi.DefaultSource$.resolveBaseFileOnlyRelation(DefaultSource.scala:277) at org.apache.hudi.DefaultSource$.createRelation(DefaultSource.scala:241) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:115) at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:72) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350) at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188) at org.apache.hudi.integ.testsuite.dag.nodes.ValidateDatasetNode.getDatasetToValidate(ValidateDatasetNode.java:56) at org.apache.hudi.integ.testsuite.dag.nodes.BaseValidateDatasetNode.execute(BaseValidateDatasetNode.java:116) at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:135) at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:104) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750)Caused by: java.lang.ClassNotFoundException: Class org.apache.hudi.common.fs.inline.InLineFileSystem not found at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2571) at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2665) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)