You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "daveKim (JIRA)" <ji...@apache.org> on 2018/12/21 05:58:00 UTC

[jira] [Updated] (HIVE-21054) union all and join

     [ https://issues.apache.org/jira/browse/HIVE-21054?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

daveKim updated HIVE-21054:
---------------------------
    Description: 
 i make 3 tables 

 
{code:java}
// 코드 자리 표시자
CREATE TABLE `testdb`.`tab` ( 
stn1 STRING COMMENT '' ,
int2 BIGINT COMMENT '' ,
float3 FLOAT COMMENT '' ,
long4 STRING COMMENT '' ,
double5 DOUBLE COMMENT '' ,
boolean6 BOOLEAN COMMENT '' ,
timestamp7 TIMESTAMP COMMENT '' ,
words8 STRING COMMENT '' ,
email9 STRING COMMENT '' ,
time10 STRING COMMENT '' ,
int11 BIGINT COMMENT '' ,
float12 FLOAT COMMENT '' ,
long13 STRING COMMENT '' ,
double14 DOUBLE COMMENT '' ,
boolean15 BOOLEAN COMMENT '' ,
timestamp16 TIMESTAMP COMMENT '' ,
email17 STRING COMMENT '' ,
time18 STRING COMMENT '' ,
list19 STRING COMMENT '' ,
id20 STRING COMMENT '' ,
sqltime21 TIMESTAMP COMMENT '' ,
ctype22 INT COMMENT '' ,
reg23 STRING COMMENT '' 
)

CREATE TABLE `testdb`.`space` ( 
stn1 STRING COMMENT '' ,
int2 BIGINT COMMENT '' ,
float3 FLOAT COMMENT '' ,
long4 STRING COMMENT '' ,
double5 DOUBLE COMMENT '' ,
boolean6 BOOLEAN COMMENT '' ,
timestamp7 TIMESTAMP COMMENT '' ,
words8 STRING COMMENT '' ,
email9 STRING COMMENT '' ,
time10 STRING COMMENT '' ,
int11 BIGINT COMMENT '' ,
float12 FLOAT COMMENT '' ,
long13 STRING COMMENT '' ,
double14 DOUBLE COMMENT '' ,
boolean15 BOOLEAN COMMENT '' ,
timestamp16 TIMESTAMP COMMENT '' ,
email17 STRING COMMENT '' ,
time18 STRING COMMENT '' ,
list19 STRING COMMENT '' ,
id20 STRING COMMENT '' ,
sqltime21 TIMESTAMP COMMENT '' ,
ctype22 INT COMMENT '' ,
reg23 STRING COMMENT '' 
)

CREATE TABLE `testdb`.`colon` ( 
stn1 STRING COMMENT '' ,
int2 BIGINT COMMENT '' ,
float3 FLOAT COMMENT '' ,
long4 STRING COMMENT '' ,
double5 DOUBLE COMMENT '' ,
boolean6 BOOLEAN COMMENT '' ,
timestamp7 TIMESTAMP COMMENT '' ,
words8 STRING COMMENT '' ,
email9 STRING COMMENT '' ,
time10 STRING COMMENT '' ,
int11 BIGINT COMMENT '' ,
float12 FLOAT COMMENT '' ,
long13 STRING COMMENT '' ,
double14 DOUBLE COMMENT '' ,
boolean15 BOOLEAN COMMENT '' ,
timestamp16 TIMESTAMP COMMENT '' ,
email17 STRING COMMENT '' ,
time18 STRING COMMENT '' ,
list19 STRING COMMENT '' ,
id20 STRING COMMENT '' ,
sqltime21 TIMESTAMP COMMENT '' ,
ctype22 INT COMMENT '' ,
reg23 STRING COMMENT '' 
)
{code}
 

 

and execute query at below : 
{code:java}
// 
set hive.vectorized.execution.enabled=FALSE;

SELECT distinct t1.ctype, t1.id_all , t2.list19
FROM (
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5   --result 45 row

UNION ALL 

SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
FROM space 
WHERE stn1='20130102010100'
AND ctype22 BETWEEN 2 AND 5   --result empty

) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
{code}
expected result 
||ctype||id_all||list19||
|2|104|ITEM30|
|2|683|ITEM30|
|1|970|ITEM30|

but, actual result empty..

 

 

instead make table "union all" query result
{code:java}
// 코드 자리 표시자
set hive.vectorized.execution.enabled=FALSE;

create table unionalltbl as
SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5 

UNION ALL 

SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
FROM space 
WHERE stn1='20130102010100'
AND ctype22 BETWEEN 2 AND 5
{code}
 

and retry query 
{code:java}
// 코드 자리 표시자
set hive.vectorized.execution.enabled=FALSE;

select DISTINCT t1.close_type, t1.id_all, t2.list19
from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30'
{code}
i got expected result 

 

and i compare two query explain
first query
{code:java}
// 코드 자리 표시자
rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[]
(
input=rel#18655:HiveProject.HIVE.[]
(
input=rel#18653:HiveJoin.HIVE.[]
(
left=rel#18650:HiveProject.HIVE.[]
(
input=rel#18648:HiveUnion.HIVE.[]
(
input#0=rel#18641:HiveProject.HIVE.[]
(
input=rel#18639:HiveFilter.HIVE.[]
(
input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false]
,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5))
)
,close_type=$21,id_all=$19,item_list=$18
)
,input#1=rel#18646:HiveProject.HIVE.[]
( 
input=rel#18644:HiveFilter.HIVE.[]
(
input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false]
,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5))
),close_type=$21,id_all=$19,item_list=$18
)
,all=true
)
,close_type=$0,id_all=$1,item_list=$2
),
right=rel#18619:HiveTableScan.HIVE.[]
(
table=[testdb.colon],table:alias=t2
)
[false],condition=AND
(
=($1, $22), =($21, _UTF-16LE'ITEM30')
)
,joinType=inner,algorithm=none,cost=not available
)
,$f0=$0,$f1=$1,$f2=$21
)
,group={0, 1, 2}
)
,close_type=$0,id_all=$1,list19=$2)
{code}
second query
{code:java}
// 코드 자리 표시자
rel#17893:HiveProject.HIVE.[]
(
input=rel#17891:HiveAggregate.HIVE.[]
(
input=rel#17889:HiveProject.HIVE.[]
(
input=rel#17887:HiveJoin.HIVE.[]
(
left=rel#17872:HiveTableScan.HIVE.[]
(able=[testdb.unionalltbl],table:alias=t1)[false],

right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false],
condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')),
joinType=inner,algorithm=none,cost=not available
),
$f0=$0,$f1=$1,$f2=$24
),
group={0, 1, 2}
)
,close_type=$0,id_all=$1,list19=$2
)
{code}
why right table 'colon' condition column number different? 

 

  was:
 

 
{code:java}
// 
set hive.vectorized.execution.enabled=FALSE;
set hive.optimize.union.remove=FALSE;


SELECT distinct t1.ctype, t1.id_all , t2.list19
FROM (
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5   --result 45 row

UNION ALL 

SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
FROM space 
WHERE stn1='20130102010100'
AND ctype22 BETWEEN 2 AND 5   --result empty

) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
{code}
expected result 
||ctype||id_all||list19||
|2|104|ITEM30|
|2|683|ITEM30|
|1|970|ITEM30|

but, actual result empty..

 

instead "union all" below query result is not empty case are work.
{code:java}
// 코드 자리 표시자
set hive.vectorized.execution.enabled=FALSE;
set hive.optimize.union.remove=FALSE;


SELECT distinct t1.ctype, t1.id_all , t2.list19
FROM (
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5   --result 45 row

UNION ALL 

SELECT 1 AS ctype, '111' AS id_all, 'ITEM30' AS item_list --dummy row

) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
{code}
 

am i wrong properties or query?

 

 

reference
 # HIVE-12788
 # HIVE-20319

 

 

 

        Summary: union all and join   (was: union all query result empty)

> union all and join 
> -------------------
>
>                 Key: HIVE-21054
>                 URL: https://issues.apache.org/jira/browse/HIVE-21054
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Planning
>    Affects Versions: 3.1.1
>            Reporter: daveKim
>            Priority: Major
>
>  i make 3 tables 
>  
> {code:java}
> // 코드 자리 표시자
> CREATE TABLE `testdb`.`tab` ( 
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT '' 
> )
> CREATE TABLE `testdb`.`space` ( 
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT '' 
> )
> CREATE TABLE `testdb`.`colon` ( 
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT '' 
> )
> {code}
>  
>  
> and execute query at below : 
> {code:java}
> // 
> set hive.vectorized.execution.enabled=FALSE;
> SELECT distinct t1.ctype, t1.id_all , t2.list19
> FROM (
> SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
> FROM tab WHERE stn1='20130101010100'
> AND ctype22 BETWEEN 2 AND 5   --result 45 row
> UNION ALL 
> SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list 
> FROM space 
> WHERE stn1='20130102010100'
> AND ctype22 BETWEEN 2 AND 5   --result empty
> ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
> {code}
> expected result 
> ||ctype||id_all||list19||
> |2|104|ITEM30|
> |2|683|ITEM30|
> |1|970|ITEM30|
> but, actual result empty..
>  
>  
> instead make table "union all" query result
> {code:java}
> // 코드 자리 표시자
> set hive.vectorized.execution.enabled=FALSE;
> create table unionalltbl as
> SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
> FROM tab WHERE stn1='20130101010100'
> AND ctype22 BETWEEN 2 AND 5 
> UNION ALL 
> SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list 
> FROM space 
> WHERE stn1='20130102010100'
> AND ctype22 BETWEEN 2 AND 5
> {code}
>  
> and retry query 
> {code:java}
> // 코드 자리 표시자
> set hive.vectorized.execution.enabled=FALSE;
> select DISTINCT t1.close_type, t1.id_all, t2.list19
> from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30'
> {code}
> i got expected result 
>  
> and i compare two query explain
> first query
> {code:java}
> // 코드 자리 표시자
> rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[]
> (
> input=rel#18655:HiveProject.HIVE.[]
> (
> input=rel#18653:HiveJoin.HIVE.[]
> (
> left=rel#18650:HiveProject.HIVE.[]
> (
> input=rel#18648:HiveUnion.HIVE.[]
> (
> input#0=rel#18641:HiveProject.HIVE.[]
> (
> input=rel#18639:HiveFilter.HIVE.[]
> (
> input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false]
> ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5))
> )
> ,close_type=$21,id_all=$19,item_list=$18
> )
> ,input#1=rel#18646:HiveProject.HIVE.[]
> ( 
> input=rel#18644:HiveFilter.HIVE.[]
> (
> input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false]
> ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5))
> ),close_type=$21,id_all=$19,item_list=$18
> )
> ,all=true
> )
> ,close_type=$0,id_all=$1,item_list=$2
> ),
> right=rel#18619:HiveTableScan.HIVE.[]
> (
> table=[testdb.colon],table:alias=t2
> )
> [false],condition=AND
> (
> =($1, $22), =($21, _UTF-16LE'ITEM30')
> )
> ,joinType=inner,algorithm=none,cost=not available
> )
> ,$f0=$0,$f1=$1,$f2=$21
> )
> ,group={0, 1, 2}
> )
> ,close_type=$0,id_all=$1,list19=$2)
> {code}
> second query
> {code:java}
> // 코드 자리 표시자
> rel#17893:HiveProject.HIVE.[]
> (
> input=rel#17891:HiveAggregate.HIVE.[]
> (
> input=rel#17889:HiveProject.HIVE.[]
> (
> input=rel#17887:HiveJoin.HIVE.[]
> (
> left=rel#17872:HiveTableScan.HIVE.[]
> (able=[testdb.unionalltbl],table:alias=t1)[false],
> right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false],
> condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')),
> joinType=inner,algorithm=none,cost=not available
> ),
> $f0=$0,$f1=$1,$f2=$24
> ),
> group={0, 1, 2}
> )
> ,close_type=$0,id_all=$1,list19=$2
> )
> {code}
> why right table 'colon' condition column number different? 
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)