You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Sagar Sumit (Jira)" <ji...@apache.org> on 2022/09/09 11:59:00 UTC
[jira] [Updated] (HUDI-4821) Presto query for bootstrapped table fails due to IOException
[ https://issues.apache.org/jira/browse/HUDI-4821?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sagar Sumit updated HUDI-4821:
------------------------------
Sprint: 2022/09/05
> Presto query for bootstrapped table fails due to IOException
> ------------------------------------------------------------
>
> Key: HUDI-4821
> URL: https://issues.apache.org/jira/browse/HUDI-4821
> Project: Apache Hudi
> Issue Type: Task
> Reporter: Sagar Sumit
> Assignee: Sagar Sumit
> Priority: Blocker
> Fix For: 0.12.1
>
>
> Create a partitioned COW bootstrapped table. Query using Presto.
> Schema:
>
> {code:java}
> presto:default> show create table hudi_bootstrap_partition_wh;
> Create Table
> --------------------------------------------------------------------------------------------------------
> CREATE TABLE hive.default.hudi_bootstrap_partition_wh (
> "_hoodie_commit_time" varchar,
> "_hoodie_commit_seqno" varchar,
> "_hoodie_record_key" varchar,
> "_hoodie_partition_path" varchar,
> "_hoodie_file_name" varchar,
> "vendorid" integer,
> "tpep_dropoff_datetime" varchar,
> "passenger_count" integer,
> "trip_distance" double,
> "ratecodeid" integer,
> "store_and_fwd_flag" varchar,
> "pulocationid" integer,
> "dolocationid" integer,
> "payment_type" integer,
> "fare_amount" double,
> "extra" double,
> "mta_tax" double,
> "tip_amount" double,
> "tolls_amount" double,
> "improvement_surcharge" double,
> "total_amount" double,
> "congestion_surcharge" double,
> "date_col" varchar,
> "id" varchar,
> "tpep_pickup_datetime" varchar
> )
> WITH (
> external_location = 'hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata',
> format = 'PARQUET',
> partitioned_by = ARRAY['tpep_pickup_datetime']
> ){code}
> Query:
> {code:java}
> presto:default> select count(*) from hudi_bootstrap_partition_wh;Query 20220909_071836_00006_8kmzy, FAILED, 1 node
> Splits: 17 total, 0 done (0.00%)
> 0:03 [0 rows, 0B] [0 rows/s, 0B/s]Query 20220909_071836_00006_8kmzy failed: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile {code}
> Exception:
> {code:java}
> com.facebook.presto.spi.PrestoException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
> at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:128)
> at com.facebook.presto.hive.util.ResumableTasks.safeProcessTask(ResumableTasks.java:47)
> at com.facebook.presto.hive.util.ResumableTasks.access$000(ResumableTasks.java:20)
> at com.facebook.presto.hive.util.ResumableTasks$1.run(ResumableTasks.java:35)
> at com.facebook.airlift.concurrent.BoundedExecutor.drainQueue(BoundedExecutor.java:78)
> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: org.apache.hudi.exception.HoodieIOException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:185)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.access$400(HFileBootstrapIndex.java:78)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.partitionIndexReader(HFileBootstrapIndex.java:272)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.fetchBootstrapIndexInfo(HFileBootstrapIndex.java:262)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.initIndexInfo(HFileBootstrapIndex.java:252)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.<init>(HFileBootstrapIndex.java:243)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:191)
> at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$addFilesToView$2(AbstractTableFileSystemView.java:138)
> at java.util.HashMap.forEach(HashMap.java:1289)
> at org.apache.hudi.common.table.view.AbstractTableFileSystemView.addFilesToView(AbstractTableFileSystemView.java:135)
> at org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:167)
> at org.apache.hudi.BaseHoodieTableFileIndex.doRefresh(BaseHoodieTableFileIndex.java:281)
> at org.apache.hudi.BaseHoodieTableFileIndex.<init>(BaseHoodieTableFileIndex.java:140)
> at org.apache.hudi.hadoop.HiveHoodieTableFileIndex.<init>(HiveHoodieTableFileIndex.java:49)
> at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatusForSnapshotMode(HoodieCopyOnWriteTableInputFormat.java:239)
> at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:146)
> at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
> at org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
> at com.facebook.presto.hive.StoragePartitionLoader.loadPartition(StoragePartitionLoader.java:258)
> at com.facebook.presto.hive.DelegatingPartitionLoader.loadPartition(DelegatingPartitionLoader.java:78)
> at com.facebook.presto.hive.BackgroundHiveSplitLoader.loadSplits(BackgroundHiveSplitLoader.java:192)
> at com.facebook.presto.hive.BackgroundHiveSplitLoader.access$300(BackgroundHiveSplitLoader.java:40)
> at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:121)
> ... 7 more
> Caused by: org.apache.hudi.org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:394)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:558)
> at org.apache.hudi.io.storage.HoodieHFileUtils.createHFileReader(HoodieHFileUtils.java:56)
> at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:183)
> ... 29 more
> Caused by: java.lang.UnsupportedOperationException: readDirect unsupported in RemoteBlockReader
> at org.apache.hadoop.hdfs.RemoteBlockReader.read(RemoteBlockReader.java:492)
> at org.apache.hadoop.hdfs.DFSInputStream$ByteBufferStrategy.doRead(DFSInputStream.java:789)
> at org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:823)
> at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:883)
> at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:938)
> at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
> at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
> at org.apache.hudi.common.fs.TimedFSDataInputStream.lambda$read$0(TimedFSDataInputStream.java:46)
> at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:106)
> at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeAndByteMetrics(HoodieWrapperFileSystem.java:124)
> at org.apache.hudi.common.fs.TimedFSDataInputStream.read(TimedFSDataInputStream.java:45)
> at org.apache.hudi.org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:80)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1506)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1723)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1550)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1448)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1461)
> at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:374)
> ... 32 more {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)