You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "Jing Zhang (Jira)" <ji...@apache.org> on 2023/02/27 07:51:00 UTC
[jira] [Created] (HUDI-5857) Snapshot query result is wrong after apply insert overwrite to an existed table with simple bucket index
Jing Zhang created HUDI-5857:
--------------------------------
Summary: Snapshot query result is wrong after apply insert overwrite to an existed table with simple bucket index
Key: HUDI-5857
URL: https://issues.apache.org/jira/browse/HUDI-5857
Project: Apache Hudi
Issue Type: Bug
Reporter: Jing Zhang
Snapshot query result is wrong after apply insert overwrite to an existed table with simple bucket index.
The bug could be produced by the following steps.
# create a mor table with bucket index
{code:java}
create table test_hudi_zj0221(
id int,
name string,
price double,
ts long,
dt string) using hudipartitioned by (dt)
options(
type='mor',
primaryKey='id',
preCombineField = 'ts',
'hoodie.index.type'='BUCKET',
'hoodie.storage.layout.partitioner.class'='org.apache.hudi.table.action.commit.SparkBucketIndexPartitioner', 'hoodie.bucket.index.num.buckets'='8', 'hoodie.datasource.write.recordkey.field' = 'id', 'hoodie.storage.layout.type'='BUCKET') {code}
# insert into data
{code:java}
insert
into test_hudi_zj0221 select 8 as id, 'hudi3' as name, 30 as price, 3000 as ts,
'2021-05-05' as dt;
insert
into test_hudi_zj0221 select 9 as id, 'hudi3' as name, 30 as price, 3000 as ts,
'2021-05-05' as dt;
insert
into test_hudi_zj0221 select 10 as id, 'hudi3' as name, 30 as price, 3000 as
ts, '2021-05-05' as dt;
insert
into test_hudi_zj0221 select 11 as id, 'hudi3' as name, 30 as price, 3000 as
ts, '2021-05-05' as dt;
insert
into test_hudi_zj0221 select 12 as id, 'hudi3' as name, 30 as price, 3000 as
ts, '2021-05-05' as dt;
insert
into test_hudi_zj0221 select 13 as id, 'hudi3' as name, 30 as price, 3000 as
ts, '2021-05-05' as dt;
insert
into test_hudi_zj0221 select 14 as id, 'hudi3' as name, 30 as price, 3000 as
ts, '2021-05-05' as dt;
insert
into test_hudi_zj0221 select 15 as id, 'hudi3' as name, 30 as price, 3000 as
ts, '2021-05-05' as dt; {code}
# find something wrong, use insert overwrite to overwrite a partition
{code:java}
insert overwrite table test_hudi_zj0221 partition(dt = '2021-05-05') select 2222, 'a2',30, 3000; {code}
# snapshot query on the table
{code:java}
select * from test_hudi_zj0221 where dt='2021-05-05';
-- or
select * from test_hudi_zj0221; {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)