You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Chris Kudelka (JIRA)" <ji...@apache.org> on 2014/09/12 05:57:34 UTC
[jira] [Updated] (HIVE-8068) Dynamic partition insert overwrite
does not overwrite files, but instead appends.
[ https://issues.apache.org/jira/browse/HIVE-8068?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Chris Kudelka updated HIVE-8068:
--------------------------------
Description:
-- using a reference table `one_row` with contents:
||dummy_field||
|dummy_value|
-- create test table
create table if not exists test_table (
line string
) partitioned by (
my_part string
)
-- run first time
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
-- run again
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20, rawDataSize=1]
select * from test_table where my_part = 'partVal';
Expected result:
1 row
Actual result:
2 rows
----
-- compare to non-dynamic partition insert, which overwrites as expected
----
-- drop table and recreate with sme definition
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
-- run again
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
select * from test_table where my_part = 'partVal';
Expected result:
1 row
Actual result:
1 row
was:
-- using a reference table `one_row` with contents:
|dummy_field|
dummy_value
-- create test table
create table if not exists test_table (
line string
) partitioned by (
my_part string
)
-- run first time
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
-- run again
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20, rawDataSize=1]
select * from test_table where my_part = 'partVal';
Expected result:
1 row
Actual result:
2 rows
----
-- compare to non-dynamic partition insert, which overwrites as expected
----
-- drop table and recreate with sme definition
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
-- run again
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
select * from test_table where my_part = 'partVal';
Expected result:
1 row
Actual result:
1 row
> Dynamic partition insert overwrite does not overwrite files, but instead appends.
> ---------------------------------------------------------------------------------
>
> Key: HIVE-8068
> URL: https://issues.apache.org/jira/browse/HIVE-8068
> Project: Hive
> Issue Type: Bug
> Components: SQL
> Affects Versions: 0.13.1
> Environment: Centos 6.2; Amazon S3 as DFS
> Reporter: Chris Kudelka
>
> -- using a reference table `one_row` with contents:
> ||dummy_field||
> |dummy_value|
> -- create test table
> create table if not exists test_table (
> line string
> ) partitioned by (
> my_part string
> )
> -- run first time
> insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
> -- run again
> insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20, rawDataSize=1]
> select * from test_table where my_part = 'partVal';
> Expected result:
> 1 row
> Actual result:
> 2 rows
> ----
> -- compare to non-dynamic partition insert, which overwrites as expected
> ----
> -- drop table and recreate with sme definition
> insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
> -- run again
> insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
> select * from test_table where my_part = 'partVal';
> Expected result:
> 1 row
> Actual result:
> 1 row
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)