You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "ASF subversion and git services (JIRA)" <ji...@apache.org> on 2017/08/18 00:29:00 UTC
[jira] [Commented] (ASTERIXDB-2044) Listify in subqueries
[ https://issues.apache.org/jira/browse/ASTERIXDB-2044?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16131526#comment-16131526 ]
ASF subversion and git services commented on ASTERIXDB-2044:
------------------------------------------------------------
Commit 5170fb212aa0e2c1e93327553fc38b6acd22c73b in asterixdb's branch refs/heads/master from [~buyingyi]
[ https://git-wip-us.apache.org/repos/asf?p=asterixdb.git;h=5170fb2 ]
[ASTERIXDB-2044][COMP] Eliminate listify for complex group-by
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Fix EliminateSubplanWithInputCardinalityOneRule to handle recursive
subplans;
- Fix various places that assumes the nested plans inside a group-by
operator cannot be empty;
- Added regression tests.
Change-Id: Ida9aa8d89a89f90256e54c8c1806af9b4a162d21
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1946
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <bu...@gmail.com>
> Listify in subqueries
> ---------------------
>
> Key: ASTERIXDB-2044
> URL: https://issues.apache.org/jira/browse/ASTERIXDB-2044
> Project: Apache AsterixDB
> Issue Type: Bug
> Components: COMP - Compiler
> Reporter: Yingyi Bu
> Assignee: Yingyi Bu
>
> The following query will result in unnecessary listifies in the optimized query plan.
> {noformat}
> DROP DATAVERSE tpch IF EXISTS;
> CREATE dataverse tpch;
> USE tpch;
> CREATE TYPE LineItemType AS CLOSED {
> l_orderkey : integer,
> l_partkey : integer,
> l_suppkey : integer,
> l_linenumber : integer,
> l_quantity : double,
> l_extendedprice : double,
> l_discount : double,
> l_tax : double,
> l_returnflag : string,
> l_linestatus : string,
> l_shipdate : string,
> l_commitdate : string,
> l_receiptdate : string,
> l_shipinstruct : string,
> l_shipmode : string,
> l_comment : string
> }
> CREATE DATASET LineItem(LineItemType) PRIMARY KEY l_orderkey,l_linenumber;
> SELECT l_returnflag AS l_returnflag,
> l_linestatus AS l_linestatus,
> coll_count(cheap) AS count_cheaps,
> coll_count(expensive) AS count_expensives
> FROM LineItem AS l
> /* +hash */
> GROUP BY l.l_returnflag AS l_returnflag,l.l_linestatus AS l_linestatus
> GROUP AS g
> LET cheap = (
> SELECT ELEMENT m
> FROM (FROM g SELECT VALUE l) AS m
> WHERE m.l_discount > 0.05
> ),
> expensive = (
> SELECT ELEMENT m
> FROM (FROM g SELECT VALUE l) AS m
> WHERE m.l_discount <= 0.05
> )
> ORDER BY l_returnflag,l_linestatus
> ;
> {noformat}
> {noformat}
> distribute result [$$31]
> -- DISTRIBUTE_RESULT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> project ([$$31])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$31] <- [{"l_returnflag": $$l_returnflag, "l_linestatus": $$l_linestatus, "count_cheaps": $$36, "count_expensives": $$37}]
> -- ASSIGN |PARTITIONED|
> exchange
> -- SORT_MERGE_EXCHANGE [$$l_returnflag(ASC), $$l_linestatus(ASC) ] |PARTITIONED|
> project ([$$l_returnflag, $$l_linestatus, $$36, $$37])
> -- STREAM_PROJECT |PARTITIONED|
> subplan {
> aggregate [$$37] <- [agg-count($$m)]
> -- AGGREGATE |LOCAL|
> select (le($$39, 0.05))
> -- STREAM_SELECT |LOCAL|
> assign [$$39] <- [$$m.getField(6)]
> -- ASSIGN |LOCAL|
> unnest $$m <- scan-collection($$24)
> -- UNNEST |LOCAL|
> subplan {
> aggregate [$$24] <- [listify($$23)]
> -- AGGREGATE |LOCAL|
> assign [$$23] <- [$$g.getField(0)]
> -- ASSIGN |LOCAL|
> unnest $$g <- scan-collection($$15)
> -- UNNEST |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE |LOCAL|
> }
> -- SUBPLAN |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE |LOCAL|
> }
> -- SUBPLAN |PARTITIONED|
> subplan {
> aggregate [$$36] <- [agg-count($$m)]
> -- AGGREGATE |LOCAL|
> select (gt($$38, 0.05))
> -- STREAM_SELECT |LOCAL|
> assign [$$38] <- [$$m.getField(6)]
> -- ASSIGN |LOCAL|
> unnest $$m <- scan-collection($$18)
> -- UNNEST |LOCAL|
> subplan {
> aggregate [$$18] <- [listify($$17)]
> -- AGGREGATE |LOCAL|
> assign [$$17] <- [$$g.getField(0)]
> -- ASSIGN |LOCAL|
> unnest $$g <- scan-collection($$15)
> -- UNNEST |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE |LOCAL|
> }
> -- SUBPLAN |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE |LOCAL|
> }
> -- SUBPLAN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> group by ([$$l_returnflag := $$32; $$l_linestatus := $$33]) decor ([]) {
> aggregate [$$15] <- [listify($$g)]
> -- AGGREGATE |LOCAL|
> nested tuple source
> -- NESTED_TUPLE_SOURCE |LOCAL|
> }
> -- PRE_CLUSTERED_GROUP_BY[$$32, $$33] |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> order (ASC, $$32) (ASC, $$33)
> -- STABLE_SORT [$$32(ASC), $$33(ASC)] |PARTITIONED|
> exchange
> -- HASH_PARTITION_EXCHANGE [$$32, $$33] |PARTITIONED|
> project ([$$32, $$33, $$g])
> -- STREAM_PROJECT |PARTITIONED|
> assign [$$g, $$33, $$32] <- [{"l": $$l}, $$l.getField(9), $$l.getField(8)]
> -- ASSIGN |PARTITIONED|
> project ([$$l])
> -- STREAM_PROJECT |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> data-scan []<-[$$34, $$35, $$l] <- tpch.LineItem
> -- DATASOURCE_SCAN |PARTITIONED|
> exchange
> -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
> empty-tuple-source
> -- EMPTY_TUPLE_SOURCE |PARTITIONED|
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)