You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Jonathan Vexler (Jira)" <ji...@apache.org> on 2023/01/13 21:18:00 UTC

[jira] [Created] (HUDI-5555) Set class loader for parquet data block

Jonathan Vexler created HUDI-5555:
-------------------------------------

             Summary: Set class loader for parquet data block
                 Key: HUDI-5555
                 URL: https://issues.apache.org/jira/browse/HUDI-5555
             Project: Apache Hudi
          Issue Type: Bug
          Components: core
            Reporter: Jonathan Vexler


inlineConf.setClassLoader(InLineFileSystem.class.getClassLoader()); 

is set in HoodieHFileDataBlock but it is not set in HoodieParquetDataBlock

This causes 
{code:java}
java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hudi.common.fs.inline.InLineFileSystem not found  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2667)  at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3431)  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3466)  at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:174)  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3574)  at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3521)  at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:540)  at org.apache.hadoop.fs.Path.getFileSystem(Path.java:365)  at org.apache.parquet.hadoop.ParquetReader$Builder.build(ParquetReader.java:336)  at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIteratorInternal(HoodieAvroParquetReader.java:168)  at org.apache.hudi.io.storage.HoodieAvroParquetReader.getIndexedRecordIterator(HoodieAvroParquetReader.java:99)  at org.apache.hudi.io.storage.HoodieAvroFileReaderBase.getRecordIterator(HoodieAvroFileReaderBase.java:39)  at org.apache.hudi.io.storage.HoodieAvroParquetReader.getRecordIterator(HoodieAvroParquetReader.java:53)  at org.apache.hudi.common.table.log.block.HoodieParquetDataBlock.readRecordsFromBlockPayload(HoodieParquetDataBlock.java:162)  at org.apache.hudi.common.table.log.block.HoodieDataBlock.getRecordIterator(HoodieDataBlock.java:128)  at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.getRecordsIterator(AbstractHoodieLogRecordReader.java:779)  at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processDataBlock(AbstractHoodieLogRecordReader.java:641)  at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.processQueuedBlocksForInstant(AbstractHoodieLogRecordReader.java:691)  at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:379)  at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scanInternal(AbstractHoodieLogRecordReader.java:231)  at org.apache.hudi.common.table.log.AbstractHoodieLogRecordReader.scan(AbstractHoodieLogRecordReader.java:220)  at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.performScan(HoodieMergedLogRecordScanner.java:114)  at org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner.<init>(HoodieMergedLogRecordScanner.java:107)  at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:61)  at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader.<init>(HoodieMetadataMergedLogRecordReader.java:49)  at org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader$Builder.build(HoodieMetadataMergedLogRecordReader.java:232)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.getLogRecordScanner(HoodieBackedTableMetadata.java:528)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.openReaders(HoodieBackedTableMetadata.java:438)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getOrCreateReaders$12(HoodieBackedTableMetadata.java:421)  at java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.getOrCreateReaders(HoodieBackedTableMetadata.java:421)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.lambda$getRecordsByKeys$2(HoodieBackedTableMetadata.java:227)  at java.util.HashMap.forEach(HashMap.java:1290)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordsByKeys(HoodieBackedTableMetadata.java:225)  at org.apache.hudi.metadata.HoodieBackedTableMetadata.getRecordByKey(HoodieBackedTableMetadata.java:148)  at org.apache.hudi.metadata.BaseTableMetadata.fetchAllFilesInPartition(BaseTableMetadata.java:327)  at org.apache.hudi.metadata.BaseTableMetadata.getAllFilesInPartition(BaseTableMetadata.java:145)  at org.apache.hudi.metadata.HoodieMetadataFileSystemView.listPartition(HoodieMetadataFileSystemView.java:65)  at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$ensurePartitionLoadedCorrectly$10(AbstractTableFileSystemView.java:311)  at java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)  at org.apache.hudi.common.table.view.AbstractTableFileSystemView.ensurePartitionLoadedCorrectly(AbstractTableFileSystemView.java:302)  at org.apache.hudi.common.table.view.AbstractTableFileSystemView.getLatestBaseFiles(AbstractTableFileSystemView.java:515)  at org.apache.hudi.hadoop.HoodieROTablePathFilter.accept(HoodieROTablePathFilter.java:200)  at org.apache.spark.sql.execution.datasources.PathFilterWrapper.accept(InMemoryFileIndex.scala:165)  at org.apache.spark.util.HadoopFSUtils$.$anonfun$listLeafFiles$8(HadoopFSUtils.scala:285)  at org.apache.spark.util.HadoopFSUtils$.$anonfun$listLeafFiles$8$adapted(HadoopFSUtils.scala:285)  at scala.collection.TraversableLike.$anonfun$filterImpl$1(TraversableLike.scala:304)  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)  at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:303)  at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:297)  at scala.collection.mutable.ArrayOps$ofRef.filterImpl(ArrayOps.scala:198)  at scala.collection.TraversableLike.filter(TraversableLike.scala:395)  at scala.collection.TraversableLike.filter$(TraversableLike.scala:395)  at scala.collection.mutable.ArrayOps$ofRef.filter(ArrayOps.scala:198)  at org.apache.spark.util.HadoopFSUtils$.listLeafFiles(HadoopFSUtils.scala:285)  at org.apache.spark.util.HadoopFSUtils$.$anonfun$parallelListLeafFilesInternal$1(HadoopFSUtils.scala:95)  at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)  at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)  at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)  at scala.collection.TraversableLike.map(TraversableLike.scala:286)  at scala.collection.TraversableLike.map$(TraversableLike.scala:279)  at scala.collection.AbstractTraversable.map(Traversable.scala:108)  at org.apache.spark.util.HadoopFSUtils$.parallelListLeafFilesInternal(HadoopFSUtils.scala:85)  at org.apache.spark.util.HadoopFSUtils$.parallelListLeafFiles(HadoopFSUtils.scala:69)  at org.apache.spark.sql.execution.datasources.InMemoryFileIndex$.bulkListLeafFiles(InMemoryFileIndex.scala:158)  at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.listLeafFiles(InMemoryFileIndex.scala:131)  at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.refresh0(InMemoryFileIndex.scala:94)  at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.<init>(InMemoryFileIndex.scala:66)  at org.apache.spark.sql.execution.datasources.DataSource.createInMemoryFileIndex(DataSource.scala:565)  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:409)  at org.apache.hudi.BaseFileOnlyRelation.toHadoopFsRelation(BaseFileOnlyRelation.scala:203)  at org.apache.hudi.DefaultSource$.resolveBaseFileOnlyRelation(DefaultSource.scala:277)  at org.apache.hudi.DefaultSource$.createRelation(DefaultSource.scala:241)  at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:115)  at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:72)  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:350)  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274)  at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245)  at scala.Option.getOrElse(Option.scala:189)  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245)  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:188)  at org.apache.hudi.integ.testsuite.dag.nodes.ValidateDatasetNode.getDatasetToValidate(ValidateDatasetNode.java:56)  at org.apache.hudi.integ.testsuite.dag.nodes.BaseValidateDatasetNode.execute(BaseValidateDatasetNode.java:116)  at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:135)  at org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:104)  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)  at java.util.concurrent.FutureTask.run(FutureTask.java:266)  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)  at java.lang.Thread.run(Thread.java:750)Caused by: java.lang.ClassNotFoundException: Class org.apache.hudi.common.fs.inline.InLineFileSystem not found  at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2571)  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2665) {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)