You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Sergey Shelukhin (JIRA)" <ji...@apache.org> on 2017/10/13 02:46:00 UTC
[jira] [Created] (HIVE-17796) PTF in a view disables PPD
Sergey Shelukhin created HIVE-17796:
---------------------------------------
Summary: PTF in a view disables PPD
Key: HIVE-17796
URL: https://issues.apache.org/jira/browse/HIVE-17796
Project: Hive
Issue Type: Bug
Reporter: Sergey Shelukhin
I disabled constant propagation to make logging cleaner. It is the same if it is enabled. See truncated path to alias;
Simple view with partition columns and filter outside of the view: PPD works.
View with PTF and filter included in the view: PPD works.
View with PTF and filter outside of the view: PPD breaks.
I looked at the logs for some time, it looks like the predicate is already null in this case when passed to partition pruner; not sure why this is happening for now.
View can also be partitioned.
{noformat}
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
set hive.metastore.aggregate.stats.cache.enabled=false;
set hive.stats.fetch.column.stats=false;
set hive.cbo.enable=false;
create table dim (c2 string) partitioned by (pc1 string, pc2 string);
create table fact (c1 string, c3 string) partitioned by (pc1 string, pc2 string);
insert overwrite table dim partition (pc1='aaa', pc2='aaa') select key from src;
insert overwrite table dim partition (pc1='ccc', pc2='ccc') select key from src;
insert overwrite table dim partition (pc1='ddd', pc2='ddd') select key from src;
insert overwrite table fact partition (pc1='aaa', pc2='aaa') select key, key from src;
insert overwrite table fact partition (pc1='bbb', pc2='bbb') select key, key from src;
insert overwrite table fact partition (pc1='ccc', pc2='ccc') select key, key from src;
create view vw_ptf as select a1.*,
(cast((row_number() over (partition by a1.pc1, a1.pc2)) as bigint) + b1.c2) as unique_key
from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2;
create view vw_simple as select a1.*, b1.c2
from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2;
create view vw_ppd as select a1.*,
(cast((row_number() over (partition by a1.pc1, a1.pc2)) as bigint) + b1.c2) as Unique_Key
from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2
where a1.pc1 = 'ccc' and a1.pc2='ccc';
set hive.optimize.constant.propagation=false;
explain extended
select a.* from vw_simple a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
explain extended
select a.* from vw_ppd a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
explain extended
select a.* from vw_ptf a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
{noformat}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)