You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Sagar Sumit (Jira)" <ji...@apache.org> on 2022/09/09 11:59:00 UTC

[jira] [Updated] (HUDI-4821) Presto query for bootstrapped table fails due to IOException

     [ https://issues.apache.org/jira/browse/HUDI-4821?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Sagar Sumit updated HUDI-4821:
------------------------------
    Sprint: 2022/09/05

> Presto query for bootstrapped table fails due to IOException
> ------------------------------------------------------------
>
>                 Key: HUDI-4821
>                 URL: https://issues.apache.org/jira/browse/HUDI-4821
>             Project: Apache Hudi
>          Issue Type: Task
>            Reporter: Sagar Sumit
>            Assignee: Sagar Sumit
>            Priority: Blocker
>             Fix For: 0.12.1
>
>
> Create a partitioned COW bootstrapped table. Query using Presto. 
> Schema:
>  
> {code:java}
> presto:default> show create table hudi_bootstrap_partition_wh;
>                                               Create Table
> --------------------------------------------------------------------------------------------------------
>  CREATE TABLE hive.default.hudi_bootstrap_partition_wh (
>     "_hoodie_commit_time" varchar,
>     "_hoodie_commit_seqno" varchar,
>     "_hoodie_record_key" varchar,
>     "_hoodie_partition_path" varchar,
>     "_hoodie_file_name" varchar,
>     "vendorid" integer,
>     "tpep_dropoff_datetime" varchar,
>     "passenger_count" integer,
>     "trip_distance" double,
>     "ratecodeid" integer,
>     "store_and_fwd_flag" varchar,
>     "pulocationid" integer,
>     "dolocationid" integer,
>     "payment_type" integer,
>     "fare_amount" double,
>     "extra" double,
>     "mta_tax" double,
>     "tip_amount" double,
>     "tolls_amount" double,
>     "improvement_surcharge" double,
>     "total_amount" double,
>     "congestion_surcharge" double,
>     "date_col" varchar,
>     "id" varchar,
>     "tpep_pickup_datetime" varchar
>  )
>  WITH (
>     external_location = 'hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata',
>     format = 'PARQUET',
>     partitioned_by = ARRAY['tpep_pickup_datetime']
>  ){code}
> Query:
> {code:java}
> presto:default> select count(*) from hudi_bootstrap_partition_wh;Query 20220909_071836_00006_8kmzy, FAILED, 1 node
> Splits: 17 total, 0 done (0.00%)
> 0:03 [0 rows, 0B] [0 rows/s, 0B/s]Query 20220909_071836_00006_8kmzy failed: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile {code}
> Exception:
> {code:java}
> com.facebook.presto.spi.PrestoException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
> 	at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:128)
> 	at com.facebook.presto.hive.util.ResumableTasks.safeProcessTask(ResumableTasks.java:47)
> 	at com.facebook.presto.hive.util.ResumableTasks.access$000(ResumableTasks.java:20)
> 	at com.facebook.presto.hive.util.ResumableTasks$1.run(ResumableTasks.java:35)
> 	at com.facebook.airlift.concurrent.BoundedExecutor.drainQueue(BoundedExecutor.java:78)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 	at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.hudi.exception.HoodieIOException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:185)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.access$400(HFileBootstrapIndex.java:78)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.partitionIndexReader(HFileBootstrapIndex.java:272)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.fetchBootstrapIndexInfo(HFileBootstrapIndex.java:262)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.initIndexInfo(HFileBootstrapIndex.java:252)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.<init>(HFileBootstrapIndex.java:243)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:191)
> 	at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$addFilesToView$2(AbstractTableFileSystemView.java:138)
> 	at java.util.HashMap.forEach(HashMap.java:1289)
> 	at org.apache.hudi.common.table.view.AbstractTableFileSystemView.addFilesToView(AbstractTableFileSystemView.java:135)
> 	at org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:167)
> 	at org.apache.hudi.BaseHoodieTableFileIndex.doRefresh(BaseHoodieTableFileIndex.java:281)
> 	at org.apache.hudi.BaseHoodieTableFileIndex.<init>(BaseHoodieTableFileIndex.java:140)
> 	at org.apache.hudi.hadoop.HiveHoodieTableFileIndex.<init>(HiveHoodieTableFileIndex.java:49)
> 	at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatusForSnapshotMode(HoodieCopyOnWriteTableInputFormat.java:239)
> 	at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:146)
> 	at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
> 	at org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
> 	at com.facebook.presto.hive.StoragePartitionLoader.loadPartition(StoragePartitionLoader.java:258)
> 	at com.facebook.presto.hive.DelegatingPartitionLoader.loadPartition(DelegatingPartitionLoader.java:78)
> 	at com.facebook.presto.hive.BackgroundHiveSplitLoader.loadSplits(BackgroundHiveSplitLoader.java:192)
> 	at com.facebook.presto.hive.BackgroundHiveSplitLoader.access$300(BackgroundHiveSplitLoader.java:40)
> 	at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:121)
> 	... 7 more
> Caused by: org.apache.hudi.org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:394)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:558)
> 	at org.apache.hudi.io.storage.HoodieHFileUtils.createHFileReader(HoodieHFileUtils.java:56)
> 	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:183)
> 	... 29 more
> Caused by: java.lang.UnsupportedOperationException: readDirect unsupported in RemoteBlockReader
> 	at org.apache.hadoop.hdfs.RemoteBlockReader.read(RemoteBlockReader.java:492)
> 	at org.apache.hadoop.hdfs.DFSInputStream$ByteBufferStrategy.doRead(DFSInputStream.java:789)
> 	at org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:823)
> 	at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:883)
> 	at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:938)
> 	at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
> 	at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
> 	at org.apache.hudi.common.fs.TimedFSDataInputStream.lambda$read$0(TimedFSDataInputStream.java:46)
> 	at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:106)
> 	at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeAndByteMetrics(HoodieWrapperFileSystem.java:124)
> 	at org.apache.hudi.common.fs.TimedFSDataInputStream.read(TimedFSDataInputStream.java:45)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:80)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1506)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1723)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1550)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1448)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1461)
> 	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:374)
> 	... 32 more {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)