You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Chris Kudelka (JIRA)" <ji...@apache.org> on 2014/09/12 05:57:34 UTC

[jira] [Updated] (HIVE-8068) Dynamic partition insert overwrite does not overwrite files, but instead appends.

     [ https://issues.apache.org/jira/browse/HIVE-8068?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Chris Kudelka updated HIVE-8068:
--------------------------------
    Description: 
-- using a reference table `one_row` with contents:
||dummy_field||
|dummy_value|

-- create test table
create table if not exists test_table (
  line string
) partitioned by (
  my_part string
)

-- run first time
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20, rawDataSize=1]

select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
2 rows

----
-- compare to non-dynamic partition insert, which overwrites as expected
----

-- drop table and recreate with sme definition
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
1 row

  was:
-- using a reference table `one_row` with contents:
|dummy_field|
 dummy_value

-- create test table
create table if not exists test_table (
  line string
) partitioned by (
  my_part string
)

-- run first time
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20, rawDataSize=1]

select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
2 rows

----
-- compare to non-dynamic partition insert, which overwrites as expected
----

-- drop table and recreate with sme definition
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
1 row


> Dynamic partition insert overwrite does not overwrite files, but instead appends.
> ---------------------------------------------------------------------------------
>
>                 Key: HIVE-8068
>                 URL: https://issues.apache.org/jira/browse/HIVE-8068
>             Project: Hive
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 0.13.1
>         Environment: Centos 6.2; Amazon S3 as DFS
>            Reporter: Chris Kudelka
>
> -- using a reference table `one_row` with contents:
> ||dummy_field||
> |dummy_value|
> -- create test table
> create table if not exists test_table (
>   line string
> ) partitioned by (
>   my_part string
> )
> -- run first time
> insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
> -- run again
> insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20, rawDataSize=1]
> select * from test_table where my_part = 'partVal';
> Expected result:
> 1 row
> Actual result:
> 2 rows
> ----
> -- compare to non-dynamic partition insert, which overwrites as expected
> ----
> -- drop table and recreate with sme definition
> insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
> -- run again
> insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10, rawDataSize=1]
> select * from test_table where my_part = 'partVal';
> Expected result:
> 1 row
> Actual result:
> 1 row



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)