You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Sagar Sumit (Jira)" <ji...@apache.org> on 2022/09/09 07:40:00 UTC

[jira] [Created] (HUDI-4821) Presto query for bootstrapped table fails due to IOException

Sagar Sumit created HUDI-4821:
---------------------------------

             Summary: Presto query for bootstrapped table fails due to IOException
                 Key: HUDI-4821
                 URL: https://issues.apache.org/jira/browse/HUDI-4821
             Project: Apache Hudi
          Issue Type: Task
            Reporter: Sagar Sumit
             Fix For: 0.12.1


Create a partitioned COW bootstrapped table. Query using Presto. 

Schema:

 
{code:java}
presto:default> show create table hudi_bootstrap_partition_wh;
                                              Create Table
--------------------------------------------------------------------------------------------------------
 CREATE TABLE hive.default.hudi_bootstrap_partition_wh (
    "_hoodie_commit_time" varchar,
    "_hoodie_commit_seqno" varchar,
    "_hoodie_record_key" varchar,
    "_hoodie_partition_path" varchar,
    "_hoodie_file_name" varchar,
    "vendorid" integer,
    "tpep_dropoff_datetime" varchar,
    "passenger_count" integer,
    "trip_distance" double,
    "ratecodeid" integer,
    "store_and_fwd_flag" varchar,
    "pulocationid" integer,
    "dolocationid" integer,
    "payment_type" integer,
    "fare_amount" double,
    "extra" double,
    "mta_tax" double,
    "tip_amount" double,
    "tolls_amount" double,
    "improvement_surcharge" double,
    "total_amount" double,
    "congestion_surcharge" double,
    "date_col" varchar,
    "id" varchar,
    "tpep_pickup_datetime" varchar
 )
 WITH (
    external_location = 'hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata',
    format = 'PARQUET',
    partitioned_by = ARRAY['tpep_pickup_datetime']
 ){code}
Query:
{code:java}
presto:default> select count(*) from hudi_bootstrap_partition_wh;Query 20220909_071836_00006_8kmzy, FAILED, 1 node
Splits: 17 total, 0 done (0.00%)
0:03 [0 rows, 0B] [0 rows/s, 0B/s]Query 20220909_071836_00006_8kmzy failed: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile {code}
Exception:
{code:java}
com.facebook.presto.spi.PrestoException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
	at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:128)
	at com.facebook.presto.hive.util.ResumableTasks.safeProcessTask(ResumableTasks.java:47)
	at com.facebook.presto.hive.util.ResumableTasks.access$000(ResumableTasks.java:20)
	at com.facebook.presto.hive.util.ResumableTasks$1.run(ResumableTasks.java:35)
	at com.facebook.airlift.concurrent.BoundedExecutor.drainQueue(BoundedExecutor.java:78)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.exception.HoodieIOException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:185)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.access$400(HFileBootstrapIndex.java:78)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.partitionIndexReader(HFileBootstrapIndex.java:272)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.fetchBootstrapIndexInfo(HFileBootstrapIndex.java:262)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.initIndexInfo(HFileBootstrapIndex.java:252)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex$HFileBootstrapIndexReader.<init>(HFileBootstrapIndex.java:243)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:191)
	at org.apache.hudi.common.table.view.AbstractTableFileSystemView.lambda$addFilesToView$2(AbstractTableFileSystemView.java:138)
	at java.util.HashMap.forEach(HashMap.java:1289)
	at org.apache.hudi.common.table.view.AbstractTableFileSystemView.addFilesToView(AbstractTableFileSystemView.java:135)
	at org.apache.hudi.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:167)
	at org.apache.hudi.BaseHoodieTableFileIndex.doRefresh(BaseHoodieTableFileIndex.java:281)
	at org.apache.hudi.BaseHoodieTableFileIndex.<init>(BaseHoodieTableFileIndex.java:140)
	at org.apache.hudi.hadoop.HiveHoodieTableFileIndex.<init>(HiveHoodieTableFileIndex.java:49)
	at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatusForSnapshotMode(HoodieCopyOnWriteTableInputFormat.java:239)
	at org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat.listStatus(HoodieCopyOnWriteTableInputFormat.java:146)
	at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
	at org.apache.hudi.hadoop.HoodieParquetInputFormatBase.getSplits(HoodieParquetInputFormatBase.java:68)
	at com.facebook.presto.hive.StoragePartitionLoader.loadPartition(StoragePartitionLoader.java:258)
	at com.facebook.presto.hive.DelegatingPartitionLoader.loadPartition(DelegatingPartitionLoader.java:78)
	at com.facebook.presto.hive.BackgroundHiveSplitLoader.loadSplits(BackgroundHiveSplitLoader.java:192)
	at com.facebook.presto.hive.BackgroundHiveSplitLoader.access$300(BackgroundHiveSplitLoader.java:40)
	at com.facebook.presto.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:121)
	... 7 more
Caused by: org.apache.hudi.org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading data index and meta index from file hdfs://namenode:8020/user/hive/warehouse/hudi_bootstrap/partitioned_metadata/.hoodie/.aux/.bootstrap/.partitions/00000000-0000-0000-0000-000000000000-0_1-0-1_00000000000001.hfile
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:394)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:558)
	at org.apache.hudi.io.storage.HoodieHFileUtils.createHFileReader(HoodieHFileUtils.java:56)
	at org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex.createReader(HFileBootstrapIndex.java:183)
	... 29 more
Caused by: java.lang.UnsupportedOperationException: readDirect unsupported in RemoteBlockReader
	at org.apache.hadoop.hdfs.RemoteBlockReader.read(RemoteBlockReader.java:492)
	at org.apache.hadoop.hdfs.DFSInputStream$ByteBufferStrategy.doRead(DFSInputStream.java:789)
	at org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:823)
	at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:883)
	at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:938)
	at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
	at org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:143)
	at org.apache.hudi.common.fs.TimedFSDataInputStream.lambda$read$0(TimedFSDataInputStream.java:46)
	at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:106)
	at org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeAndByteMetrics(HoodieWrapperFileSystem.java:124)
	at org.apache.hudi.common.fs.TimedFSDataInputStream.read(TimedFSDataInputStream.java:45)
	at org.apache.hudi.org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:80)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1506)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1723)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1550)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1448)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1461)
	at org.apache.hudi.org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:374)
	... 32 more {code}
 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)