You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@asterixdb.apache.org by "Gift Sinthong (JIRA)" <ji...@apache.org> on 2018/11/15 00:22:01 UTC

[jira] [Updated] (ASTERIXDB-2481) Out of Memory error doing aggregation

     [ https://issues.apache.org/jira/browse/ASTERIXDB-2481?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Gift Sinthong updated ASTERIXDB-2481:
-------------------------------------
    Description: 
This is the schema:

CREATE TYPE Test AS open

{ unique2: int64 }

;

CREATE DATASET wisconsin_5gb(Test)
 PRIMARY KEY unique2;

This is the query:
 SELECT min( t.oddOnePercent) as min, max(t.oddOnePercent) as max, count(distinct t.oddOnePercent) as cnt
 FROM wisconsin_5gb t ;

 

The plan for this query:
 distribute result [$$46]
 – DISTRIBUTE_RESULT |UNPARTITIONED|
 exchange
 – ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
 project ([$$46])
 – STREAM_PROJECT |UNPARTITIONED|
 assign [$$46] <- [\\{"min": $$48, "max": $$49, "cnt": $$50}]
 – ASSIGN |UNPARTITIONED|
 project ([$$48, $$49, $$50])
 – STREAM_PROJECT |UNPARTITIONED|
 subplan

{ aggregate [$$50] <- [agg-sql-sum($$53)] -- AGGREGATE |LOCAL| aggregate [$$53] <- [agg-sql-count($$43)] -- AGGREGATE |LOCAL| distinct ([$$43]) -- MICRO_PRE_SORTED_DISTINCT_BY |LOCAL| order (ASC, $$43) -- IN_MEMORY_STABLE_SORT [$$43(ASC)] |LOCAL| assign [$$43] <- [$$52.getField("oddOnePercent")] -- ASSIGN |UNPARTITIONED| assign [$$52] <- [$#4.getField(0)] -- ASSIGN |UNPARTITIONED| unnest $#4 <- scan-collection($$28) -- UNNEST |UNPARTITIONED| nested tuple source -- NESTED_TUPLE_SOURCE |UNPARTITIONED| }

– SUBPLAN |UNPARTITIONED|
 aggregate [$$28, $$48, $$49] <- [listify($$27), agg-sql-min($$33), agg-sql-max($$33)]
 – AGGREGATE |UNPARTITIONED|
 exchange
 – RANDOM_MERGE_EXCHANGE |PARTITIONED|
 project ([$$27, $$33])
 – STREAM_PROJECT |PARTITIONED|
 assign [$$33, $$27] <- [$$t.getField("oddOnePercent"), \\{"t": $$t}]
 – ASSIGN |PARTITIONED|
 project ([$$t])
 – STREAM_PROJECT |PARTITIONED|
 exchange
 – ONE_TO_ONE_EXCHANGE |PARTITIONED|
 data-scan []<-[$$47, $$t] <- benchmark.wisconsin_5gb
 – DATASOURCE_SCAN |PARTITIONED|
 exchange
 – ONE_TO_ONE_EXCHANGE |PARTITIONED|
 empty-tuple-source
 – EMPTY_TUPLE_SOURCE |PARTITIONED|

  was:
This is the schema for this query:

CREATE TYPE Test AS open{
 unique2: int64
};

CREATE DATASET wisconsin_1gb(Test)
 PRIMARY KEY unique2;



This is the query:
SELECT min( t.oddOnePercent) as min, max(t.oddOnePercent) as max, count(distinct t.oddOnePercent) as cnt
 FROM wisconsin_5gb t ;

 

The plan for this query:
distribute result [$$46]
-- DISTRIBUTE_RESULT |UNPARTITIONED|
 exchange
 -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
 project ([$$46])
 -- STREAM_PROJECT |UNPARTITIONED|
 assign [$$46] <- [\{"min": $$48, "max": $$49, "cnt": $$50}]
 -- ASSIGN |UNPARTITIONED|
 project ([$$48, $$49, $$50])
 -- STREAM_PROJECT |UNPARTITIONED|
 subplan {
 aggregate [$$50] <- [agg-sql-sum($$53)]
 -- AGGREGATE |LOCAL|
 aggregate [$$53] <- [agg-sql-count($$43)]
 -- AGGREGATE |LOCAL|
 distinct ([$$43])
 -- MICRO_PRE_SORTED_DISTINCT_BY |LOCAL|
 order (ASC, $$43) 
 -- IN_MEMORY_STABLE_SORT [$$43(ASC)] |LOCAL|
 assign [$$43] <- [$$52.getField("oddOnePercent")]
 -- ASSIGN |UNPARTITIONED|
 assign [$$52] <- [$#4.getField(0)]
 -- ASSIGN |UNPARTITIONED|
 unnest $#4 <- scan-collection($$28)
 -- UNNEST |UNPARTITIONED|
 nested tuple source
 -- NESTED_TUPLE_SOURCE |UNPARTITIONED|
 }
 -- SUBPLAN |UNPARTITIONED|
 aggregate [$$28, $$48, $$49] <- [listify($$27), agg-sql-min($$33), agg-sql-max($$33)]
 -- AGGREGATE |UNPARTITIONED|
 exchange
 -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
 project ([$$27, $$33])
 -- STREAM_PROJECT |PARTITIONED|
 assign [$$33, $$27] <- [$$t.getField("oddOnePercent"), \{"t": $$t}]
 -- ASSIGN |PARTITIONED|
 project ([$$t])
 -- STREAM_PROJECT |PARTITIONED|
 exchange
 -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
 data-scan []<-[$$47, $$t] <- benchmark.wisconsin_5gb
 -- DATASOURCE_SCAN |PARTITIONED|
 exchange
 -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
 empty-tuple-source
 -- EMPTY_TUPLE_SOURCE |PARTITIONED|


> Out of Memory error doing aggregation
> -------------------------------------
>
>                 Key: ASTERIXDB-2481
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-2481
>             Project: Apache AsterixDB
>          Issue Type: Bug
>          Components: COMP - Compiler, RT - Runtime, SQL - Translator SQL++
>    Affects Versions: 0.9.5
>         Environment: Linux
>            Reporter: Gift Sinthong
>            Priority: Critical
>         Attachments: Screen Shot 2018-11-14 at 3.12.31 PM.png
>
>
> This is the schema:
> CREATE TYPE Test AS open
> { unique2: int64 }
> ;
> CREATE DATASET wisconsin_5gb(Test)
>  PRIMARY KEY unique2;
> This is the query:
>  SELECT min( t.oddOnePercent) as min, max(t.oddOnePercent) as max, count(distinct t.oddOnePercent) as cnt
>  FROM wisconsin_5gb t ;
>  
> The plan for this query:
>  distribute result [$$46]
>  – DISTRIBUTE_RESULT |UNPARTITIONED|
>  exchange
>  – ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
>  project ([$$46])
>  – STREAM_PROJECT |UNPARTITIONED|
>  assign [$$46] <- [\\{"min": $$48, "max": $$49, "cnt": $$50}]
>  – ASSIGN |UNPARTITIONED|
>  project ([$$48, $$49, $$50])
>  – STREAM_PROJECT |UNPARTITIONED|
>  subplan
> { aggregate [$$50] <- [agg-sql-sum($$53)] -- AGGREGATE |LOCAL| aggregate [$$53] <- [agg-sql-count($$43)] -- AGGREGATE |LOCAL| distinct ([$$43]) -- MICRO_PRE_SORTED_DISTINCT_BY |LOCAL| order (ASC, $$43) -- IN_MEMORY_STABLE_SORT [$$43(ASC)] |LOCAL| assign [$$43] <- [$$52.getField("oddOnePercent")] -- ASSIGN |UNPARTITIONED| assign [$$52] <- [$#4.getField(0)] -- ASSIGN |UNPARTITIONED| unnest $#4 <- scan-collection($$28) -- UNNEST |UNPARTITIONED| nested tuple source -- NESTED_TUPLE_SOURCE |UNPARTITIONED| }
> – SUBPLAN |UNPARTITIONED|
>  aggregate [$$28, $$48, $$49] <- [listify($$27), agg-sql-min($$33), agg-sql-max($$33)]
>  – AGGREGATE |UNPARTITIONED|
>  exchange
>  – RANDOM_MERGE_EXCHANGE |PARTITIONED|
>  project ([$$27, $$33])
>  – STREAM_PROJECT |PARTITIONED|
>  assign [$$33, $$27] <- [$$t.getField("oddOnePercent"), \\{"t": $$t}]
>  – ASSIGN |PARTITIONED|
>  project ([$$t])
>  – STREAM_PROJECT |PARTITIONED|
>  exchange
>  – ONE_TO_ONE_EXCHANGE |PARTITIONED|
>  data-scan []<-[$$47, $$t] <- benchmark.wisconsin_5gb
>  – DATASOURCE_SCAN |PARTITIONED|
>  exchange
>  – ONE_TO_ONE_EXCHANGE |PARTITIONED|
>  empty-tuple-source
>  – EMPTY_TUPLE_SOURCE |PARTITIONED|



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)