You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Zoltan Haindrich (JIRA)" <ji...@apache.org> on 2018/05/16 12:24:00 UTC
[jira] [Updated] (HIVE-19570) Multiple inserts using "Group by"
generates incorrect results
[ https://issues.apache.org/jira/browse/HIVE-19570?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zoltan Haindrich updated HIVE-19570:
------------------------------------
Description:
Repro steps:
{code}
drop database if exists ax1 cascade;
create database ax1;
use ax1;
CREATE TABLE
tmp1 (
v1 string , v2 string , v3 string )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
INSERT INTO tmp1
VALUES
('a', 'b', 'c1')
, ('a', 'b', 'c2')
, ('d', 'e', 'f')
, ('g', 'h', 'i')
;
CREATE TABLE
tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
CREATE TABLE
tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
CREATE TABLE
tmp_grouped_by_all_col ( v1 string , v2 string , v3 string )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
FROM tmp1
INSERT INTO tmp_grouped_by_one_col
SELECT v1, count(distinct v2), count(distinct v3)
GROUP BY v1
INSERT INTO tmp_grouped_by_all_col
SELECT v1, v2, v3
GROUP BY v1, v2, v3
;
select 'tmp_grouped_by_one_col',count(*) from tmp_grouped_by_one_col
union all
select 'tmp_grouped_by_two_col',count(*) from tmp_grouped_by_two_col
union all
select 'tmp_grouped_by_all_col',count(*) from tmp_grouped_by_all_col;
select * from tmp_grouped_by_all_col;
{code}
tmp_grouped_by_all_col table should have 4 reocrds but it loads 7 records into the table.
{code}
+----------------------------+----------------------------+----------------------------+--+
| tmp_grouped_by_all_col.v1 | tmp_grouped_by_all_col.v2 | tmp_grouped_by_all_col.v3 |
+----------------------------+----------------------------+----------------------------+--+
| a | b | b |
| a | c1 | c1 |
| a | c2 | c2 |
| d | e | e |
| d | f | f |
| g | h | h |
| g | i | i |
+----------------------------+----------------------------+----------------------------+--+
{code}
was:
Repro steps:
drop database if exists ax1 cascade;
create database ax1;
use ax1;
CREATE TABLE
tmp1 (
v1 string , v2 string , v3 string )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
INSERT INTO tmp1
VALUES
('a', 'b', 'c1')
, ('a', 'b', 'c2')
, ('d', 'e', 'f')
, ('g', 'h', 'i')
;
CREATE TABLE
tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
CREATE TABLE
tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
CREATE TABLE
tmp_grouped_by_all_col ( v1 string , v2 string , v3 string )
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
;
FROM tmp1
INSERT INTO tmp_grouped_by_one_col
SELECT v1, count(distinct v2), count(distinct v3)
GROUP BY v1
INSERT INTO tmp_grouped_by_all_col
SELECT v1, v2, v3
GROUP BY v1, v2, v3
;
select 'tmp_grouped_by_one_col',count(*) from tmp_grouped_by_one_col
union all
select 'tmp_grouped_by_two_col',count(*) from tmp_grouped_by_two_col
union all
select 'tmp_grouped_by_all_col',count(*) from tmp_grouped_by_all_col;
select * from tmp_grouped_by_all_col;
tmp_grouped_by_all_col table should have 4 reocrds but it loads 7 records into the table.
+----------------------------+----------------------------+----------------------------+--+
| tmp_grouped_by_all_col.v1 | tmp_grouped_by_all_col.v2 | tmp_grouped_by_all_col.v3 |
+----------------------------+----------------------------+----------------------------+--+
| a | b | b |
| a | c1 | c1 |
| a | c2 | c2 |
| d | e | e |
| d | f | f |
| g | h | h |
| g | i | i |
+----------------------------+----------------------------+----------------------------+--+
> Multiple inserts using "Group by" generates incorrect results
> -------------------------------------------------------------
>
> Key: HIVE-19570
> URL: https://issues.apache.org/jira/browse/HIVE-19570
> Project: Hive
> Issue Type: Bug
> Components: Logical Optimizer, Query Processor
> Affects Versions: 1.2.0, 3.0.0
> Reporter: Riju Trivedi
> Priority: Critical
>
> Repro steps:
> {code}
> drop database if exists ax1 cascade;
> create database ax1;
> use ax1;
> CREATE TABLE
> tmp1 (
> v1 string , v2 string , v3 string )
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\t'
> LINES TERMINATED BY '\n'
> ;
> INSERT INTO tmp1
> VALUES
> ('a', 'b', 'c1')
> , ('a', 'b', 'c2')
> , ('d', 'e', 'f')
> , ('g', 'h', 'i')
> ;
> CREATE TABLE
> tmp_grouped_by_one_col ( v1 string , cnt__v2 int , cnt__v3 int )
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\t'
> LINES TERMINATED BY '\n'
> ;
> CREATE TABLE
> tmp_grouped_by_two_col ( v1 string , v2 string , cnt__v3 int )
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\t'
> LINES TERMINATED BY '\n'
> ;
> CREATE TABLE
> tmp_grouped_by_all_col ( v1 string , v2 string , v3 string )
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\t'
> LINES TERMINATED BY '\n'
> ;
> FROM tmp1
> INSERT INTO tmp_grouped_by_one_col
> SELECT v1, count(distinct v2), count(distinct v3)
> GROUP BY v1
> INSERT INTO tmp_grouped_by_all_col
> SELECT v1, v2, v3
> GROUP BY v1, v2, v3
> ;
> select 'tmp_grouped_by_one_col',count(*) from tmp_grouped_by_one_col
> union all
> select 'tmp_grouped_by_two_col',count(*) from tmp_grouped_by_two_col
> union all
> select 'tmp_grouped_by_all_col',count(*) from tmp_grouped_by_all_col;
> select * from tmp_grouped_by_all_col;
> {code}
> tmp_grouped_by_all_col table should have 4 reocrds but it loads 7 records into the table.
> {code}
> +----------------------------+----------------------------+----------------------------+--+
> | tmp_grouped_by_all_col.v1 | tmp_grouped_by_all_col.v2 | tmp_grouped_by_all_col.v3 |
> +----------------------------+----------------------------+----------------------------+--+
> | a | b | b |
> | a | c1 | c1 |
> | a | c2 | c2 |
> | d | e | e |
> | d | f | f |
> | g | h | h |
> | g | i | i |
> +----------------------------+----------------------------+----------------------------+--+
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)