You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "daveKim (JIRA)" <ji...@apache.org> on 2018/12/21 05:58:00 UTC
[jira] [Updated] (HIVE-21054) union all and join
[ https://issues.apache.org/jira/browse/HIVE-21054?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
daveKim updated HIVE-21054:
---------------------------
Description:
i make 3 tables
{code:java}
// 코드 자리 표시자
CREATE TABLE `testdb`.`tab` (
stn1 STRING COMMENT '' ,
int2 BIGINT COMMENT '' ,
float3 FLOAT COMMENT '' ,
long4 STRING COMMENT '' ,
double5 DOUBLE COMMENT '' ,
boolean6 BOOLEAN COMMENT '' ,
timestamp7 TIMESTAMP COMMENT '' ,
words8 STRING COMMENT '' ,
email9 STRING COMMENT '' ,
time10 STRING COMMENT '' ,
int11 BIGINT COMMENT '' ,
float12 FLOAT COMMENT '' ,
long13 STRING COMMENT '' ,
double14 DOUBLE COMMENT '' ,
boolean15 BOOLEAN COMMENT '' ,
timestamp16 TIMESTAMP COMMENT '' ,
email17 STRING COMMENT '' ,
time18 STRING COMMENT '' ,
list19 STRING COMMENT '' ,
id20 STRING COMMENT '' ,
sqltime21 TIMESTAMP COMMENT '' ,
ctype22 INT COMMENT '' ,
reg23 STRING COMMENT ''
)
CREATE TABLE `testdb`.`space` (
stn1 STRING COMMENT '' ,
int2 BIGINT COMMENT '' ,
float3 FLOAT COMMENT '' ,
long4 STRING COMMENT '' ,
double5 DOUBLE COMMENT '' ,
boolean6 BOOLEAN COMMENT '' ,
timestamp7 TIMESTAMP COMMENT '' ,
words8 STRING COMMENT '' ,
email9 STRING COMMENT '' ,
time10 STRING COMMENT '' ,
int11 BIGINT COMMENT '' ,
float12 FLOAT COMMENT '' ,
long13 STRING COMMENT '' ,
double14 DOUBLE COMMENT '' ,
boolean15 BOOLEAN COMMENT '' ,
timestamp16 TIMESTAMP COMMENT '' ,
email17 STRING COMMENT '' ,
time18 STRING COMMENT '' ,
list19 STRING COMMENT '' ,
id20 STRING COMMENT '' ,
sqltime21 TIMESTAMP COMMENT '' ,
ctype22 INT COMMENT '' ,
reg23 STRING COMMENT ''
)
CREATE TABLE `testdb`.`colon` (
stn1 STRING COMMENT '' ,
int2 BIGINT COMMENT '' ,
float3 FLOAT COMMENT '' ,
long4 STRING COMMENT '' ,
double5 DOUBLE COMMENT '' ,
boolean6 BOOLEAN COMMENT '' ,
timestamp7 TIMESTAMP COMMENT '' ,
words8 STRING COMMENT '' ,
email9 STRING COMMENT '' ,
time10 STRING COMMENT '' ,
int11 BIGINT COMMENT '' ,
float12 FLOAT COMMENT '' ,
long13 STRING COMMENT '' ,
double14 DOUBLE COMMENT '' ,
boolean15 BOOLEAN COMMENT '' ,
timestamp16 TIMESTAMP COMMENT '' ,
email17 STRING COMMENT '' ,
time18 STRING COMMENT '' ,
list19 STRING COMMENT '' ,
id20 STRING COMMENT '' ,
sqltime21 TIMESTAMP COMMENT '' ,
ctype22 INT COMMENT '' ,
reg23 STRING COMMENT ''
)
{code}
and execute query at below :
{code:java}
//
set hive.vectorized.execution.enabled=FALSE;
SELECT distinct t1.ctype, t1.id_all , t2.list19
FROM (
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5 --result 45 row
UNION ALL
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
FROM space
WHERE stn1='20130102010100'
AND ctype22 BETWEEN 2 AND 5 --result empty
) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
{code}
expected result
||ctype||id_all||list19||
|2|104|ITEM30|
|2|683|ITEM30|
|1|970|ITEM30|
but, actual result empty..
instead make table "union all" query result
{code:java}
// 코드 자리 표시자
set hive.vectorized.execution.enabled=FALSE;
create table unionalltbl as
SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5
UNION ALL
SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list
FROM space
WHERE stn1='20130102010100'
AND ctype22 BETWEEN 2 AND 5
{code}
and retry query
{code:java}
// 코드 자리 표시자
set hive.vectorized.execution.enabled=FALSE;
select DISTINCT t1.close_type, t1.id_all, t2.list19
from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30'
{code}
i got expected result
and i compare two query explain
first query
{code:java}
// 코드 자리 표시자
rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[]
(
input=rel#18655:HiveProject.HIVE.[]
(
input=rel#18653:HiveJoin.HIVE.[]
(
left=rel#18650:HiveProject.HIVE.[]
(
input=rel#18648:HiveUnion.HIVE.[]
(
input#0=rel#18641:HiveProject.HIVE.[]
(
input=rel#18639:HiveFilter.HIVE.[]
(
input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false]
,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5))
)
,close_type=$21,id_all=$19,item_list=$18
)
,input#1=rel#18646:HiveProject.HIVE.[]
(
input=rel#18644:HiveFilter.HIVE.[]
(
input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false]
,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5))
),close_type=$21,id_all=$19,item_list=$18
)
,all=true
)
,close_type=$0,id_all=$1,item_list=$2
),
right=rel#18619:HiveTableScan.HIVE.[]
(
table=[testdb.colon],table:alias=t2
)
[false],condition=AND
(
=($1, $22), =($21, _UTF-16LE'ITEM30')
)
,joinType=inner,algorithm=none,cost=not available
)
,$f0=$0,$f1=$1,$f2=$21
)
,group={0, 1, 2}
)
,close_type=$0,id_all=$1,list19=$2)
{code}
second query
{code:java}
// 코드 자리 표시자
rel#17893:HiveProject.HIVE.[]
(
input=rel#17891:HiveAggregate.HIVE.[]
(
input=rel#17889:HiveProject.HIVE.[]
(
input=rel#17887:HiveJoin.HIVE.[]
(
left=rel#17872:HiveTableScan.HIVE.[]
(able=[testdb.unionalltbl],table:alias=t1)[false],
right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false],
condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')),
joinType=inner,algorithm=none,cost=not available
),
$f0=$0,$f1=$1,$f2=$24
),
group={0, 1, 2}
)
,close_type=$0,id_all=$1,list19=$2
)
{code}
why right table 'colon' condition column number different?
was:
{code:java}
//
set hive.vectorized.execution.enabled=FALSE;
set hive.optimize.union.remove=FALSE;
SELECT distinct t1.ctype, t1.id_all , t2.list19
FROM (
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5 --result 45 row
UNION ALL
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
FROM space
WHERE stn1='20130102010100'
AND ctype22 BETWEEN 2 AND 5 --result empty
) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
{code}
expected result
||ctype||id_all||list19||
|2|104|ITEM30|
|2|683|ITEM30|
|1|970|ITEM30|
but, actual result empty..
instead "union all" below query result is not empty case are work.
{code:java}
// 코드 자리 표시자
set hive.vectorized.execution.enabled=FALSE;
set hive.optimize.union.remove=FALSE;
SELECT distinct t1.ctype, t1.id_all , t2.list19
FROM (
SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
FROM tab WHERE stn1='20130101010100'
AND ctype22 BETWEEN 2 AND 5 --result 45 row
UNION ALL
SELECT 1 AS ctype, '111' AS id_all, 'ITEM30' AS item_list --dummy row
) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
{code}
am i wrong properties or query?
reference
# HIVE-12788
# HIVE-20319
Summary: union all and join (was: union all query result empty)
> union all and join
> -------------------
>
> Key: HIVE-21054
> URL: https://issues.apache.org/jira/browse/HIVE-21054
> Project: Hive
> Issue Type: Bug
> Components: Query Planning
> Affects Versions: 3.1.1
> Reporter: daveKim
> Priority: Major
>
> i make 3 tables
>
> {code:java}
> // 코드 자리 표시자
> CREATE TABLE `testdb`.`tab` (
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT ''
> )
> CREATE TABLE `testdb`.`space` (
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT ''
> )
> CREATE TABLE `testdb`.`colon` (
> stn1 STRING COMMENT '' ,
> int2 BIGINT COMMENT '' ,
> float3 FLOAT COMMENT '' ,
> long4 STRING COMMENT '' ,
> double5 DOUBLE COMMENT '' ,
> boolean6 BOOLEAN COMMENT '' ,
> timestamp7 TIMESTAMP COMMENT '' ,
> words8 STRING COMMENT '' ,
> email9 STRING COMMENT '' ,
> time10 STRING COMMENT '' ,
> int11 BIGINT COMMENT '' ,
> float12 FLOAT COMMENT '' ,
> long13 STRING COMMENT '' ,
> double14 DOUBLE COMMENT '' ,
> boolean15 BOOLEAN COMMENT '' ,
> timestamp16 TIMESTAMP COMMENT '' ,
> email17 STRING COMMENT '' ,
> time18 STRING COMMENT '' ,
> list19 STRING COMMENT '' ,
> id20 STRING COMMENT '' ,
> sqltime21 TIMESTAMP COMMENT '' ,
> ctype22 INT COMMENT '' ,
> reg23 STRING COMMENT ''
> )
> {code}
>
>
> and execute query at below :
> {code:java}
> //
> set hive.vectorized.execution.enabled=FALSE;
> SELECT distinct t1.ctype, t1.id_all , t2.list19
> FROM (
> SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
> FROM tab WHERE stn1='20130101010100'
> AND ctype22 BETWEEN 2 AND 5 --result 45 row
> UNION ALL
> SELECT ctype22 AS ctype, id20 AS id_all, list19 AS item_list
> FROM space
> WHERE stn1='20130102010100'
> AND ctype22 BETWEEN 2 AND 5 --result empty
> ) t1 JOIN colon t2 ON (t1.id_all = t2.id20 AND t2.list19 ='ITEM30') -- join matched 3 rows
> {code}
> expected result
> ||ctype||id_all||list19||
> |2|104|ITEM30|
> |2|683|ITEM30|
> |1|970|ITEM30|
> but, actual result empty..
>
>
> instead make table "union all" query result
> {code:java}
> // 코드 자리 표시자
> set hive.vectorized.execution.enabled=FALSE;
> create table unionalltbl as
> SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list
> FROM tab WHERE stn1='20130101010100'
> AND ctype22 BETWEEN 2 AND 5
> UNION ALL
> SELECT ctype22 AS close_type, id20 AS id_all, list19 AS ITEM_list
> FROM space
> WHERE stn1='20130102010100'
> AND ctype22 BETWEEN 2 AND 5
> {code}
>
> and retry query
> {code:java}
> // 코드 자리 표시자
> set hive.vectorized.execution.enabled=FALSE;
> select DISTINCT t1.close_type, t1.id_all, t2.list19
> from unionalltbl t1 JOIN colon t2 ON t1.id_all=t2.id20 and t2.list19='ITEM30'
> {code}
> i got expected result
>
> and i compare two query explain
> first query
> {code:java}
> // 코드 자리 표시자
> rel#18659:HiveProject.HIVE.[](input=rel#18657:HiveAggregate.HIVE.[]
> (
> input=rel#18655:HiveProject.HIVE.[]
> (
> input=rel#18653:HiveJoin.HIVE.[]
> (
> left=rel#18650:HiveProject.HIVE.[]
> (
> input=rel#18648:HiveUnion.HIVE.[]
> (
> input#0=rel#18641:HiveProject.HIVE.[]
> (
> input=rel#18639:HiveFilter.HIVE.[]
> (
> input=rel#18611:HiveTableScan.HIVE.[](table=[testdb.tab],table:alias=tab)[false]
> ,condition=AND(=($0, _UTF-16LE'20130101010100'), BETWEEN(false, $21, 2, 5))
> )
> ,close_type=$21,id_all=$19,item_list=$18
> )
> ,input#1=rel#18646:HiveProject.HIVE.[]
> (
> input=rel#18644:HiveFilter.HIVE.[]
> (
> input=rel#18614:HiveTableScan.HIVE.[](table=[testdb.space],table:alias=space)[false]
> ,condition=AND(=($0, _UTF-16LE'20130102010100'), BETWEEN(false, $21, 2, 5))
> ),close_type=$21,id_all=$19,item_list=$18
> )
> ,all=true
> )
> ,close_type=$0,id_all=$1,item_list=$2
> ),
> right=rel#18619:HiveTableScan.HIVE.[]
> (
> table=[testdb.colon],table:alias=t2
> )
> [false],condition=AND
> (
> =($1, $22), =($21, _UTF-16LE'ITEM30')
> )
> ,joinType=inner,algorithm=none,cost=not available
> )
> ,$f0=$0,$f1=$1,$f2=$21
> )
> ,group={0, 1, 2}
> )
> ,close_type=$0,id_all=$1,list19=$2)
> {code}
> second query
> {code:java}
> // 코드 자리 표시자
> rel#17893:HiveProject.HIVE.[]
> (
> input=rel#17891:HiveAggregate.HIVE.[]
> (
> input=rel#17889:HiveProject.HIVE.[]
> (
> input=rel#17887:HiveJoin.HIVE.[]
> (
> left=rel#17872:HiveTableScan.HIVE.[]
> (able=[testdb.unionalltbl],table:alias=t1)[false],
> right=rel#17873:HiveTableScan.HIVE.[](table=[testdb.colon],table:alias=t2)[false],
> condition=AND(=($1, $25), =($24, _UTF-16LE'ITEM30')),
> joinType=inner,algorithm=none,cost=not available
> ),
> $f0=$0,$f1=$1,$f2=$24
> ),
> group={0, 1, 2}
> )
> ,close_type=$0,id_all=$1,list19=$2
> )
> {code}
> why right table 'colon' condition column number different?
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)