You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Sergey Shelukhin (JIRA)" <ji...@apache.org> on 2016/10/14 23:23:20 UTC

[jira] [Created] (HIVE-14970) repeated insert into is broken for buckets (incorrect results)

Sergey Shelukhin created HIVE-14970:
---------------------------------------

             Summary: repeated insert into is broken for buckets (incorrect results)
                 Key: HIVE-14970
                 URL: https://issues.apache.org/jira/browse/HIVE-14970
             Project: Hive
          Issue Type: Bug
            Reporter: Sergey Shelukhin
            Priority: Critical


{noformat}
CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

insert into table src_bucket select key,value from srcpart limit 10;

dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src_bucket/;

select *, INPUT__FILE__NAME from src_bucket;
select * from src_bucket tablesample (bucket 1 out of 2) s;
select * from src_bucket tablesample (bucket 2 out of 2) s;

insert into table src_bucket select key,value from srcpart limit 10;

dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src_bucket/;

select *, INPUT__FILE__NAME from src_bucket;
select * from src_bucket tablesample (bucket 1 out of 2) s;
select * from src_bucket tablesample (bucket 2 out of 2) s;
{noformat}

Results in the following (with masking disabled and grepping away the noise).
Looks like bucket mapping completely breaks due to extra files, which may have implications for all the optimizations that depend on them.
This should work or at least fail if this is not supported.
{noformat}
PREHOOK: query: CREATE TABLE src_bucket(key STRING, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
PREHOOK: query: insert into table src_bucket select key,value from srcpart limit 10
Found 2 items
-rwxr-xr-x   1 sergey staff         46 2016-10-14 16:09 pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
-rwxr-xr-x   1 sergey staff         68 2016-10-14 16:09 pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
PREHOOK: query: select *, INPUT__FILE__NAME from src_bucket
165	val_165	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
255	val_255	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
484	val_484	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
86	val_86	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
238	val_238	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
27	val_27	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
278	val_278	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
311	val_311	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
409	val_409	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
98	val_98	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
PREHOOK: query: select * from src_bucket tablesample (bucket 1 out of 2) s
165	val_165
255	val_255
484	val_484
86	val_86
PREHOOK: query: select * from src_bucket tablesample (bucket 2 out of 2) s
238	val_238
27	val_27
278	val_278
311	val_311
409	val_409
98	val_98
PREHOOK: query: insert into table src_bucket select key,value from srcpart limit 10
Found 4 items
-rwxr-xr-x   1 sergey staff         46 2016-10-14 16:09 pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
-rwxr-xr-x   1 sergey staff         46 2016-10-14 16:09 pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0_copy_1
-rwxr-xr-x   1 sergey staff         68 2016-10-14 16:09 pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
-rwxr-xr-x   1 sergey staff         68 2016-10-14 16:09 pfile:///Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
PREHOOK: query: select *, INPUT__FILE__NAME from src_bucket
165	val_165	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
255	val_255	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
484	val_484	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
86	val_86	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0
165	val_165	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0_copy_1
255	val_255	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0_copy_1
484	val_484	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0_copy_1
86	val_86	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000000_0_copy_1
238	val_238	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
27	val_27	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
278	val_278	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
311	val_311	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
409	val_409	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
98	val_98	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0
238	val_238	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
27	val_27	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
278	val_278	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
311	val_311	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
409	val_409	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
98	val_98	pfile:/Users/sergey/git/hive/itests/qtest/target/warehouse/src_bucket/000001_0_copy_1
PREHOOK: query: select * from src_bucket tablesample (bucket 1 out of 2) s
165	val_165
255	val_255
484	val_484
86	val_86
PREHOOK: query: select * from src_bucket tablesample (bucket 2 out of 2) s
{noformat}




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)