You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@calcite.apache.org by "Julian Hyde (Jira)" <ji...@apache.org> on 2023/02/23 17:40:00 UTC

[jira] [Updated] (CALCITE-5540) Wrong plan for query with COUNT(DISTINCT subQuery)

     [ https://issues.apache.org/jira/browse/CALCITE-5540?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Julian Hyde updated CALCITE-5540:
---------------------------------
    Description: 
A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.

The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.

[~peng713128] wrote:

I think output is wrong. And I prefer to set expansion to true.
One edge of LatticeSpace graph is missed when the subquery is not expanded.
The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
I think it should add to the graph.

foodmart query_id:6739
{code:java}
  select
  "store"."store_type" as "c0",
  count(
    distinct (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Owned'
    )
  ) as "m0",
  count(
    distinct (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Independent'
    )
  ) as "m1",
  count(
    (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Independent'
    )
  ) as "m2",
  count(distinct "store_id" + "warehouse_id") as "m3",
  count("store_id" + "warehouse_id") as "m4",
  count("warehouse"."stores_id") as "m5"
from
  "store" as "store",
  "warehouse" as "warehouse"
where
  "warehouse"."stores_id" = "store"."store_id"
group by
  "store"."store_type"{code}
when set SqlToRelConverter.Config.expand = true
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
  LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
    LogicalJoin(condition=[=($25, $41)], joinType=[left])
      LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
        LogicalJoin(condition=[=($25, $40)], joinType=[left])
          LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
            LogicalJoin(condition=[=($25, $39)], joinType=[left])
              LogicalJoin(condition=[=($26, $0)], joinType=[inner])
                JdbcTableScan(table=[[foodmart, store]])
                JdbcTableScan(table=[[foodmart, warehouse]])
              LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
                LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
                  LogicalFilter(condition=[=($1, 'Large Owned')])
                    JdbcTableScan(table=[[foodmart, warehouse_class]])
          LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
            LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
              LogicalFilter(condition=[=($1, 'Large Independent')])
                JdbcTableScan(table=[[foodmart, warehouse_class]])
      LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
        LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
          LogicalFilter(condition=[=($1, 'Large Independent')])
            JdbcTableScan(table=[[foodmart, warehouse_class]]){code}
when set SqlToRelConverter.Config.expand = false
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
  LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f2=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f3=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f4=[+($0, $24)], stores_id=[$26])
    LogicalFilter(condition=[=($26, $0)])
      LogicalJoin(condition=[true], joinType=[inner])
        JdbcTableScan(table=[[foodmart, store]])
        JdbcTableScan(table=[[foodmart, warehouse]]){code}

  was:
A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.

The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.

I think output is wrong. And I prefer to set expansion to true.
One edge of LatticeSpace graph is missed when the subquery is not expanded.
The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
I think it should add to the graph.

foodmart query_id:6739
{code:java}
  select
  "store"."store_type" as "c0",
  count(
    distinct (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Owned'
    )
  ) as "m0",
  count(
    distinct (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Independent'
    )
  ) as "m1",
  count(
    (
      select
        "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
      from
        "warehouse_class" AS "warehouse_class"
      where
        "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
        and "warehouse_class"."description" = 'Large Independent'
    )
  ) as "m2",
  count(distinct "store_id" + "warehouse_id") as "m3",
  count("store_id" + "warehouse_id") as "m4",
  count("warehouse"."stores_id") as "m5"
from
  "store" as "store",
  "warehouse" as "warehouse"
where
  "warehouse"."stores_id" = "store"."store_id"
group by
  "store"."store_type"{code}
when set SqlToRelConverter.Config.expand = true
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
  LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
    LogicalJoin(condition=[=($25, $41)], joinType=[left])
      LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
        LogicalJoin(condition=[=($25, $40)], joinType=[left])
          LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
            LogicalJoin(condition=[=($25, $39)], joinType=[left])
              LogicalJoin(condition=[=($26, $0)], joinType=[inner])
                JdbcTableScan(table=[[foodmart, store]])
                JdbcTableScan(table=[[foodmart, warehouse]])
              LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
                LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
                  LogicalFilter(condition=[=($1, 'Large Owned')])
                    JdbcTableScan(table=[[foodmart, warehouse_class]])
          LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
            LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
              LogicalFilter(condition=[=($1, 'Large Independent')])
                JdbcTableScan(table=[[foodmart, warehouse_class]])
      LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
        LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
          LogicalFilter(condition=[=($1, 'Large Independent')])
            JdbcTableScan(table=[[foodmart, warehouse_class]]){code}

when set SqlToRelConverter.Config.expand = false
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
  LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f2=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f3=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
  LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
    JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f4=[+($0, $24)], stores_id=[$26])
    LogicalFilter(condition=[=($26, $0)])
      LogicalJoin(condition=[true], joinType=[inner])
        JdbcTableScan(table=[[foodmart, store]])
        JdbcTableScan(table=[[foodmart, warehouse]]){code}


> Wrong plan for query with COUNT(DISTINCT subQuery)
> --------------------------------------------------
>
>                 Key: CALCITE-5540
>                 URL: https://issues.apache.org/jira/browse/CALCITE-5540
>             Project: Calcite
>          Issue Type: Bug
>            Reporter: Julian Hyde
>            Priority: Major
>
> A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.
> The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.
> [~peng713128] wrote:
> I think output is wrong. And I prefer to set expansion to true.
> One edge of LatticeSpace graph is missed when the subquery is not expanded.
> The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
> I think it should add to the graph.
> foodmart query_id:6739
> {code:java}
>   select
>   "store"."store_type" as "c0",
>   count(
>     distinct (
>       select
>         "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
>       from
>         "warehouse_class" AS "warehouse_class"
>       where
>         "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
>         and "warehouse_class"."description" = 'Large Owned'
>     )
>   ) as "m0",
>   count(
>     distinct (
>       select
>         "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
>       from
>         "warehouse_class" AS "warehouse_class"
>       where
>         "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
>         and "warehouse_class"."description" = 'Large Independent'
>     )
>   ) as "m1",
>   count(
>     (
>       select
>         "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
>       from
>         "warehouse_class" AS "warehouse_class"
>       where
>         "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
>         and "warehouse_class"."description" = 'Large Independent'
>     )
>   ) as "m2",
>   count(distinct "store_id" + "warehouse_id") as "m3",
>   count("store_id" + "warehouse_id") as "m4",
>   count("warehouse"."stores_id") as "m5"
> from
>   "store" as "store",
>   "warehouse" as "warehouse"
> where
>   "warehouse"."stores_id" = "store"."store_id"
> group by
>   "store"."store_type"{code}
> when set SqlToRelConverter.Config.expand = true
> {code:java}
> LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
>   LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
>     LogicalJoin(condition=[=($25, $41)], joinType=[left])
>       LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
>         LogicalJoin(condition=[=($25, $40)], joinType=[left])
>           LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
>             LogicalJoin(condition=[=($25, $39)], joinType=[left])
>               LogicalJoin(condition=[=($26, $0)], joinType=[inner])
>                 JdbcTableScan(table=[[foodmart, store]])
>                 JdbcTableScan(table=[[foodmart, warehouse]])
>               LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
>                 LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
>                   LogicalFilter(condition=[=($1, 'Large Owned')])
>                     JdbcTableScan(table=[[foodmart, warehouse_class]])
>           LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
>             LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
>               LogicalFilter(condition=[=($1, 'Large Independent')])
>                 JdbcTableScan(table=[[foodmart, warehouse_class]])
>       LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
>         LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
>           LogicalFilter(condition=[=($1, 'Large Independent')])
>             JdbcTableScan(table=[[foodmart, warehouse_class]]){code}
> when set SqlToRelConverter.Config.expand = false
> {code:java}
> LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
>   LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
> LogicalProject(warehouse_class_id=[$0])
>   LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
>     JdbcTableScan(table=[[foodmart, warehouse_class]])
> })], $f2=[$SCALAR_QUERY({
> LogicalProject(warehouse_class_id=[$0])
>   LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
>     JdbcTableScan(table=[[foodmart, warehouse_class]])
> })], $f3=[$SCALAR_QUERY({
> LogicalProject(warehouse_class_id=[$0])
>   LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
>     JdbcTableScan(table=[[foodmart, warehouse_class]])
> })], $f4=[+($0, $24)], stores_id=[$26])
>     LogicalFilter(condition=[=($26, $0)])
>       LogicalJoin(condition=[true], joinType=[inner])
>         JdbcTableScan(table=[[foodmart, store]])
>         JdbcTableScan(table=[[foodmart, warehouse]]){code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)