You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "ASF subversion and git services (JIRA)" <ji...@apache.org> on 2017/08/18 00:29:00 UTC

[jira] [Commented] (ASTERIXDB-2044) Listify in subqueries

    [ https://issues.apache.org/jira/browse/ASTERIXDB-2044?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16131526#comment-16131526 ] 

ASF subversion and git services commented on ASTERIXDB-2044:
------------------------------------------------------------

Commit 5170fb212aa0e2c1e93327553fc38b6acd22c73b in asterixdb's branch refs/heads/master from [~buyingyi]
[ https://git-wip-us.apache.org/repos/asf?p=asterixdb.git;h=5170fb2 ]

[ASTERIXDB-2044][COMP] Eliminate listify for complex group-by

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Fix EliminateSubplanWithInputCardinalityOneRule to handle recursive
  subplans;
- Fix various places that assumes the nested plans inside a group-by
  operator cannot be empty;
- Added regression tests.

Change-Id: Ida9aa8d89a89f90256e54c8c1806af9b4a162d21
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1946
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <bu...@gmail.com>


> Listify in subqueries
> ---------------------
>
>                 Key: ASTERIXDB-2044
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-2044
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: COMP - Compiler
>            Reporter: Yingyi Bu
>            Assignee: Yingyi Bu
>
> The following query will result in unnecessary listifies in the optimized query plan.
> {noformat}
> DROP  DATAVERSE tpch IF EXISTS;
> CREATE  dataverse tpch;
> USE tpch;
> CREATE TYPE LineItemType AS CLOSED {
>   l_orderkey : integer,
>   l_partkey : integer,
>   l_suppkey : integer,
>   l_linenumber : integer,
>   l_quantity : double,
>   l_extendedprice : double,
>   l_discount : double,
>   l_tax : double,
>   l_returnflag : string,
>   l_linestatus : string,
>   l_shipdate : string,
>   l_commitdate : string,
>   l_receiptdate : string,
>   l_shipinstruct : string,
>   l_shipmode : string,
>   l_comment : string
> }
> CREATE DATASET LineItem(LineItemType) PRIMARY KEY l_orderkey,l_linenumber;
> SELECT l_returnflag AS l_returnflag,
>        l_linestatus AS l_linestatus,
>        coll_count(cheap) AS count_cheaps,
>        coll_count(expensive) AS count_expensives
> FROM LineItem AS l
> /* +hash */
> GROUP BY l.l_returnflag AS l_returnflag,l.l_linestatus AS l_linestatus
> GROUP AS g
> LET cheap = (
>       SELECT ELEMENT m
>       FROM (FROM g SELECT VALUE l) AS m
>       WHERE m.l_discount > 0.05
> ),
> expensive = (
>       SELECT ELEMENT m
>       FROM (FROM g SELECT VALUE l) AS m
>       WHERE m.l_discount <= 0.05
> )
> ORDER BY l_returnflag,l_linestatus
> ;
> {noformat}
> {noformat}
> distribute result [$$31]
> -- DISTRIBUTE_RESULT  |PARTITIONED|
>   exchange
>   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>     project ([$$31])
>     -- STREAM_PROJECT  |PARTITIONED|
>       assign [$$31] <- [{"l_returnflag": $$l_returnflag, "l_linestatus": $$l_linestatus, "count_cheaps": $$36, "count_expensives": $$37}]
>       -- ASSIGN  |PARTITIONED|
>         exchange
>         -- SORT_MERGE_EXCHANGE [$$l_returnflag(ASC), $$l_linestatus(ASC) ]  |PARTITIONED|
>           project ([$$l_returnflag, $$l_linestatus, $$36, $$37])
>           -- STREAM_PROJECT  |PARTITIONED|
>             subplan {
>                       aggregate [$$37] <- [agg-count($$m)]
>                       -- AGGREGATE  |LOCAL|
>                         select (le($$39, 0.05))
>                         -- STREAM_SELECT  |LOCAL|
>                           assign [$$39] <- [$$m.getField(6)]
>                           -- ASSIGN  |LOCAL|
>                             unnest $$m <- scan-collection($$24)
>                             -- UNNEST  |LOCAL|
>                               subplan {
>                                         aggregate [$$24] <- [listify($$23)]
>                                         -- AGGREGATE  |LOCAL|
>                                           assign [$$23] <- [$$g.getField(0)]
>                                           -- ASSIGN  |LOCAL|
>                                             unnest $$g <- scan-collection($$15)
>                                             -- UNNEST  |LOCAL|
>                                               nested tuple source
>                                               -- NESTED_TUPLE_SOURCE  |LOCAL|
>                                      }
>                               -- SUBPLAN  |LOCAL|
>                                 nested tuple source
>                                 -- NESTED_TUPLE_SOURCE  |LOCAL|
>                    }
>             -- SUBPLAN  |PARTITIONED|
>               subplan {
>                         aggregate [$$36] <- [agg-count($$m)]
>                         -- AGGREGATE  |LOCAL|
>                           select (gt($$38, 0.05))
>                           -- STREAM_SELECT  |LOCAL|
>                             assign [$$38] <- [$$m.getField(6)]
>                             -- ASSIGN  |LOCAL|
>                               unnest $$m <- scan-collection($$18)
>                               -- UNNEST  |LOCAL|
>                                 subplan {
>                                           aggregate [$$18] <- [listify($$17)]
>                                           -- AGGREGATE  |LOCAL|
>                                             assign [$$17] <- [$$g.getField(0)]
>                                             -- ASSIGN  |LOCAL|
>                                               unnest $$g <- scan-collection($$15)
>                                               -- UNNEST  |LOCAL|
>                                                 nested tuple source
>                                                 -- NESTED_TUPLE_SOURCE  |LOCAL|
>                                        }
>                                 -- SUBPLAN  |LOCAL|
>                                   nested tuple source
>                                   -- NESTED_TUPLE_SOURCE  |LOCAL|
>                      }
>               -- SUBPLAN  |PARTITIONED|
>                 exchange
>                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                   group by ([$$l_returnflag := $$32; $$l_linestatus := $$33]) decor ([]) {
>                             aggregate [$$15] <- [listify($$g)]
>                             -- AGGREGATE  |LOCAL|
>                               nested tuple source
>                               -- NESTED_TUPLE_SOURCE  |LOCAL|
>                          }
>                   -- PRE_CLUSTERED_GROUP_BY[$$32, $$33]  |PARTITIONED|
>                     exchange
>                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                       order (ASC, $$32) (ASC, $$33) 
>                       -- STABLE_SORT [$$32(ASC), $$33(ASC)]  |PARTITIONED|
>                         exchange
>                         -- HASH_PARTITION_EXCHANGE [$$32, $$33]  |PARTITIONED|
>                           project ([$$32, $$33, $$g])
>                           -- STREAM_PROJECT  |PARTITIONED|
>                             assign [$$g, $$33, $$32] <- [{"l": $$l}, $$l.getField(9), $$l.getField(8)]
>                             -- ASSIGN  |PARTITIONED|
>                               project ([$$l])
>                               -- STREAM_PROJECT  |PARTITIONED|
>                                 exchange
>                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                   data-scan []<-[$$34, $$35, $$l] <- tpch.LineItem
>                                   -- DATASOURCE_SCAN  |PARTITIONED|
>                                     exchange
>                                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                       empty-tuple-source
>                                       -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)