You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Sergey Shelukhin (JIRA)" <ji...@apache.org> on 2017/10/13 02:46:00 UTC
[jira] [Created] (HIVE-17796) PTF in a view disables PPD

Sergey Shelukhin created HIVE-17796:
---------------------------------------

             Summary: PTF in a view disables PPD
                 Key: HIVE-17796
                 URL: https://issues.apache.org/jira/browse/HIVE-17796
             Project: Hive
          Issue Type: Bug
            Reporter: Sergey Shelukhin


I disabled constant propagation to make logging cleaner. It is the same if it is enabled. See truncated path to alias; 
Simple view with partition columns and filter outside of the view: PPD works.
View with PTF and filter included in the view: PPD works.
View with PTF and filter outside of the view: PPD breaks.

I looked at the logs for some time, it looks like the predicate is already null in this case when passed to partition pruner; not sure why this is happening for now.
View can also be partitioned.


{noformat}
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
set hive.metastore.aggregate.stats.cache.enabled=false;
set hive.stats.fetch.column.stats=false;
set hive.cbo.enable=false;


create table dim (c2 string) partitioned by (pc1 string, pc2 string);
create table fact (c1 string, c3 string) partitioned by (pc1 string, pc2 string);

insert overwrite table dim partition (pc1='aaa', pc2='aaa') select key from src;
insert overwrite table dim partition (pc1='ccc', pc2='ccc') select key from src;
insert overwrite table dim partition (pc1='ddd', pc2='ddd') select key from src;
insert overwrite table fact partition (pc1='aaa', pc2='aaa') select key, key from src;
insert overwrite table fact partition (pc1='bbb', pc2='bbb') select key, key from src;
insert overwrite table fact partition (pc1='ccc', pc2='ccc') select key, key from src;

create view vw_ptf as select a1.*,
(cast((row_number() over (partition by a1.pc1, a1.pc2)) as bigint) + b1.c2) as unique_key
from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2;

create view vw_simple as select a1.*, b1.c2
from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2;

create view vw_ppd as select a1.*,
(cast((row_number() over (partition by a1.pc1, a1.pc2)) as bigint) + b1.c2) as Unique_Key
from fact a1 join dim b1 on a1.pc1 = b1.pc1 and a1.pc2 = b1.pc2
where a1.pc1 = 'ccc' and a1.pc2='ccc';

set hive.optimize.constant.propagation=false;


explain extended
select a.* from vw_simple a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc'); 
explain extended
select a.* from vw_ppd a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
explain extended
select a.* from vw_ptf a WHERE 1 = 1 AND (a.pc1 = 'ccc' and a.pc2='ccc');
{noformat}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)