You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Gopal V (JIRA)" <ji...@apache.org> on 2016/01/08 00:47:39 UTC

[jira] [Updated] (HIVE-12808) Logical PPD: Push filter clauses through PTF into TS

     [ https://issues.apache.org/jira/browse/HIVE-12808?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Gopal V updated HIVE-12808:
---------------------------
    Description: 
Simplified repro case, with the slow query showing the push-down miss. 

And the manually rewritten query to indicate the expected one.

Part of the problem could be the window range not being split apart for PPD, but the FIL is not pushed down even if the rownum filter is removed.

{code}
create temporary table positions (regionid string, id bigint, deviceid string, ts string);

insert into positions values('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-02'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-02');


-- slow query
explain
WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC) AS rownos
         FROM     positions ), 
latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
AND    id=1422792010 
AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432';

-- fast query
explain
WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC) AS rownos
         FROM     positions 
         WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
         AND    id=1422792010 
         AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432'
),latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
;
{code}

  was:
Simplified repro case, with the slow query showing the push-down miss. 

And the manually rewritten query to indicate the expected one.

Part of the problem could be the window range not being split apart for PPD, but the FIL is not pushed down even if the rownum filter is removed.

{code}
create temporary table positions (regionid string, id bigint, deviceid string, ts string);

insert into positions values('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-01'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-02'),
('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-02');


-- slow query

WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC) AS rownos
         FROM     positions ), 
latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
AND    id=1422792010 
AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432';

-- fast query

WITH t1 AS 
( 
         SELECT   *, 
                  Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC) AS rownos
         FROM     positions 
         WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
         AND    id=1422792010 
         AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432'
),latestposition as ( 
       SELECT * 
       FROM   t1 
       WHERE  rownos = 1) 
SELECT * 
FROM   latestposition 
;
{code}


> Logical PPD: Push filter clauses through PTF into TS
> ----------------------------------------------------
>
>                 Key: HIVE-12808
>                 URL: https://issues.apache.org/jira/browse/HIVE-12808
>             Project: Hive
>          Issue Type: Bug
>          Components: Logical Optimizer
>    Affects Versions: 1.2.1, 2.0.0
>            Reporter: Gopal V
>
> Simplified repro case, with the slow query showing the push-down miss. 
> And the manually rewritten query to indicate the expected one.
> Part of the problem could be the window range not being split apart for PPD, but the FIL is not pushed down even if the rownum filter is removed.
> {code}
> create temporary table positions (regionid string, id bigint, deviceid string, ts string);
> insert into positions values('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-01'),
> ('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-01'),
> ('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-02'),
> ('1d6a0be1-6366-4692-9597-ebd5cd0f01d1', 1422792010, '6c5d1a30-2331-448b-a726-a380d6b3a432', '2016-01-02');
> -- slow query
> explain
> WITH t1 AS 
> ( 
>          SELECT   *, 
>                   Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC) AS rownos
>          FROM     positions ), 
> latestposition as ( 
>        SELECT * 
>        FROM   t1 
>        WHERE  rownos = 1) 
> SELECT * 
> FROM   latestposition 
> WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
> AND    id=1422792010 
> AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432';
> -- fast query
> explain
> WITH t1 AS 
> ( 
>          SELECT   *, 
>                   Row_number() over ( PARTITION BY regionid, id, deviceid ORDER BY ts DESC) AS rownos
>          FROM     positions 
>          WHERE  regionid='1d6a0be1-6366-4692-9597-ebd5cd0f01d1' 
>          AND    id=1422792010 
>          AND    deviceid='6c5d1a30-2331-448b-a726-a380d6b3a432'
> ),latestposition as ( 
>        SELECT * 
>        FROM   t1 
>        WHERE  rownos = 1) 
> SELECT * 
> FROM   latestposition 
> ;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)