You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Sagar Sumit (Jira)" <ji...@apache.org> on 2022/09/09 07:40:00 UTC
[jira] [Created] (HUDI-4821) Presto query for bootstrapped table fails due to IOException
Sagar Sumit created HUDI-4821:
---------------------------------
Summary: Presto query for bootstrapped table fails due to IOException
Key: HUDI-4821
URL: https://issues.apache.org/jira/browse/HUDI-4821
Project: Apache Hudi
Issue Type: Task
Reporter: Sagar Sumit
Fix For: 0.12.1
Create a partitioned COW bootstrapped table. Query using Presto.
Schema:
{code:java}
presto:default> show create table hudi_bootstrap_partition_wh;
Create Table
--------------------------------------------------------------------------------------------------------
CREATE TABLE hive.default.hudi_bootstrap_partition_wh (
"_hoodie_commit_time" varchar,
"_hoodie_commit_seqno" varchar,
"_hoodie_record_key" varchar,
"_hoodie_partition_path" varchar,
"_hoodie_file_name" varchar,
"vendorid" integer,
"tpep_dropoff_datetime" varchar,
"passenger_count" integer,
"trip_distance" double,
"ratecodeid" integer,
"store_and_fwd_flag" varchar,
"pulocationid" integer,
"dolocationid" integer,
"payment_type" integer,
"fare_amount" double,
"extra" double,
"mta_tax" double,
"tip_amount" double,
"tolls_amount" double,
"improvement_surcharge" double,
"total_amount" double,
"congestion_surcharge" double,
"date_col" varchar,
"id" varchar,
"tpep_pickup_datetime" varchar
)
WITH (
external_location = 'hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata',
format = 'PARQUET',
partitioned_by = ARRAY['tpep_pickup_datetime']
){code}
Query:
{code:java}
presto:default> select count(*) from hudi_bootstrap_partition_wh;Query 20220909_071836_00006_8kmzy, FAILED, 1 node
Splits: 17 total, 0 done (0.00%)
0:03 [0 rows, 0B] [0 rows/s, 0B/s]Query 20220909_071836_00006_8kmzy failed: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile {code}
Exception:
{code:java}
com.facebook.presto.spi.PrestoException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:128)
at com.facebook.presto.hive.util.ResumableTasks.safeProcessTask(ResumableTasks.java:47)
at com.facebook.presto.hive.util.ResumableTasks.access$000(ResumableTasks.java:20)
at com.facebook.presto.hive.util.ResumableTasks$1.run(ResumableTasks.java:35)
at com.facebook.airlift.concurrent.BoundedExecutor.drainQueue(BoundedExecutor.java:78)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.exception.HoodieIOException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:185)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.access$400(HFileBootstrapIndex.java:78)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.partitionIndexReader(HFileBootstrapIndex.java:272)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.fetchBootstrapIndexInfo(HFileBootstrapIndex.java:262)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.initIndexInfo(HFileBootstrapIndex.java:252)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.<init>(HFileBootstrapIndex.java:243)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:191)
at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$addFilesToView$2(AbstractTableFileSystemView.java:138)
at java.util.HashMap.forEach(HashMap.java:1289)
at org.apache.hudi.common.table.view.AbstractTableFileSystemView.addFilesToView(AbstractTableFileSystemView.java:135)
at org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:167)
at org.apache.hudi.BaseHoodieTableFileIndex.doRefresh(BaseHoodieTableFileIndex.java:281)
at org.apache.hudi.BaseHoodieTableFileIndex.<init>(BaseHoodieTableFileIndex.java:140)
at org.apache.hudi.hadoop.HiveHoodieTableFileIndex.<init>(HiveHoodieTableFileIndex.java:49)
at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatusForSnapshotMode(HoodieCopyOnWriteTableInputFormat.java:239)
at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:146)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
at org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
at com.facebook.presto.hive.StoragePartitionLoader.loadPartition(StoragePartitionLoader.java:258)
at com.facebook.presto.hive.DelegatingPartitionLoader.loadPartition(DelegatingPartitionLoader.java:78)
at com.facebook.presto.hive.BackgroundHiveSplitLoader.loadSplits(BackgroundHiveSplitLoader.java:192)
at com.facebook.presto.hive.BackgroundHiveSplitLoader.access$300(BackgroundHiveSplitLoader.java:40)
at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:121)
... 7 more
Caused by: org.apache.hudi.org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:394)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:558)
at org.apache.hudi.io.storage.HoodieHFileUtils.createHFileReader(HoodieHFileUtils.java:56)
at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:183)
... 29 more
Caused by: java.lang.UnsupportedOperationException: readDirect unsupported in RemoteBlockReader
at org.apache.hadoop.hdfs.RemoteBlockReader.read(RemoteBlockReader.java:492)
at org.apache.hadoop.hdfs.DFSInputStream$ByteBufferStrategy.doRead(DFSInputStream.java:789)
at org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:823)
at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:883)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:938)
at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
at org.apache.hudi.common.fs.TimedFSDataInputStream.lambda$read$0(TimedFSDataInputStream.java:46)
at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:106)
at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeAndByteMetrics(HoodieWrapperFileSystem.java:124)
at org.apache.hudi.common.fs.TimedFSDataInputStream.read(TimedFSDataInputStream.java:45)
at org.apache.hudi.org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:80)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1506)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1723)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1550)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1448)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1461)
at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:374)
... 32 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)