You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@calcite.apache.org by "Julian Hyde (Jira)" <ji...@apache.org> on 2023/02/23 17:40:00 UTC
[jira] [Updated] (CALCITE-5540) Wrong plan for query with COUNT(DISTINCT subQuery)
[ https://issues.apache.org/jira/browse/CALCITE-5540?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Julian Hyde updated CALCITE-5540:
---------------------------------
Description:
A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.
The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.
[~peng713128] wrote:
I think output is wrong. And I prefer to set expansion to true.
One edge of LatticeSpace graph is missed when the subquery is not expanded.
The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
I think it should add to the graph.
foodmart query_id:6739
{code:java}
select
"store"."store_type" as "c0",
count(
distinct (
select
"warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
from
"warehouse_class" AS "warehouse_class"
where
"warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
and "warehouse_class"."description" = 'Large Owned'
)
) as "m0",
count(
distinct (
select
"warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
from
"warehouse_class" AS "warehouse_class"
where
"warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
and "warehouse_class"."description" = 'Large Independent'
)
) as "m1",
count(
(
select
"warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
from
"warehouse_class" AS "warehouse_class"
where
"warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
and "warehouse_class"."description" = 'Large Independent'
)
) as "m2",
count(distinct "store_id" + "warehouse_id") as "m3",
count("store_id" + "warehouse_id") as "m4",
count("warehouse"."stores_id") as "m5"
from
"store" as "store",
"warehouse" as "warehouse"
where
"warehouse"."stores_id" = "store"."store_id"
group by
"store"."store_type"{code}
when set SqlToRelConverter.Config.expand = true
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
LogicalJoin(condition=[=($25, $41)], joinType=[left])
LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
LogicalJoin(condition=[=($25, $40)], joinType=[left])
LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
LogicalJoin(condition=[=($25, $39)], joinType=[left])
LogicalJoin(condition=[=($26, $0)], joinType=[inner])
JdbcTableScan(table=[[foodmart, store]])
JdbcTableScan(table=[[foodmart, warehouse]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
LogicalFilter(condition=[=($1, 'Large Owned')])
JdbcTableScan(table=[[foodmart, warehouse_class]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
LogicalFilter(condition=[=($1, 'Large Independent')])
JdbcTableScan(table=[[foodmart, warehouse_class]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
LogicalFilter(condition=[=($1, 'Large Independent')])
JdbcTableScan(table=[[foodmart, warehouse_class]]){code}
when set SqlToRelConverter.Config.expand = false
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f2=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f3=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f4=[+($0, $24)], stores_id=[$26])
LogicalFilter(condition=[=($26, $0)])
LogicalJoin(condition=[true], joinType=[inner])
JdbcTableScan(table=[[foodmart, store]])
JdbcTableScan(table=[[foodmart, warehouse]]){code}
was:
A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.
The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.
I think output is wrong. And I prefer to set expansion to true.
One edge of LatticeSpace graph is missed when the subquery is not expanded.
The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
I think it should add to the graph.
foodmart query_id:6739
{code:java}
select
"store"."store_type" as "c0",
count(
distinct (
select
"warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
from
"warehouse_class" AS "warehouse_class"
where
"warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
and "warehouse_class"."description" = 'Large Owned'
)
) as "m0",
count(
distinct (
select
"warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
from
"warehouse_class" AS "warehouse_class"
where
"warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
and "warehouse_class"."description" = 'Large Independent'
)
) as "m1",
count(
(
select
"warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
from
"warehouse_class" AS "warehouse_class"
where
"warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
and "warehouse_class"."description" = 'Large Independent'
)
) as "m2",
count(distinct "store_id" + "warehouse_id") as "m3",
count("store_id" + "warehouse_id") as "m4",
count("warehouse"."stores_id") as "m5"
from
"store" as "store",
"warehouse" as "warehouse"
where
"warehouse"."stores_id" = "store"."store_id"
group by
"store"."store_type"{code}
when set SqlToRelConverter.Config.expand = true
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
LogicalJoin(condition=[=($25, $41)], joinType=[left])
LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
LogicalJoin(condition=[=($25, $40)], joinType=[left])
LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
LogicalJoin(condition=[=($25, $39)], joinType=[left])
LogicalJoin(condition=[=($26, $0)], joinType=[inner])
JdbcTableScan(table=[[foodmart, store]])
JdbcTableScan(table=[[foodmart, warehouse]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
LogicalFilter(condition=[=($1, 'Large Owned')])
JdbcTableScan(table=[[foodmart, warehouse_class]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
LogicalFilter(condition=[=($1, 'Large Independent')])
JdbcTableScan(table=[[foodmart, warehouse_class]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
LogicalFilter(condition=[=($1, 'Large Independent')])
JdbcTableScan(table=[[foodmart, warehouse_class]]){code}
when set SqlToRelConverter.Config.expand = false
{code:java}
LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f2=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f3=[$SCALAR_QUERY({
LogicalProject(warehouse_class_id=[$0])
LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
JdbcTableScan(table=[[foodmart, warehouse_class]])
})], $f4=[+($0, $24)], stores_id=[$26])
LogicalFilter(condition=[=($26, $0)])
LogicalJoin(condition=[true], joinType=[inner])
JdbcTableScan(table=[[foodmart, store]])
JdbcTableScan(table=[[foodmart, warehouse]]){code}
> Wrong plan for query with COUNT(DISTINCT subQuery)
> --------------------------------------------------
>
> Key: CALCITE-5540
> URL: https://issues.apache.org/jira/browse/CALCITE-5540
> Project: Calcite
> Issue Type: Bug
> Reporter: Julian Hyde
> Priority: Major
>
> A query with two "COUNT(DISTINCT (SELECT ...))" aggregate functions generates an incorrect or perhaps inefficient plan when SqlToRelConverter.expand=false.
> The query occurs in LatticeSuggesterTest (enabled only with slow tests) and the error is discussed in CALCITE-5537.
> [~peng713128] wrote:
> I think output is wrong. And I prefer to set expansion to true.
> One edge of LatticeSpace graph is missed when the subquery is not expanded.
> The missing case is as follow, the relationship of warehouse#warehouse_class_id and warehouse_class#warehouse_class_id is missing.
> I think it should add to the graph.
> foodmart query_id:6739
> {code:java}
> select
> "store"."store_type" as "c0",
> count(
> distinct (
> select
> "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
> from
> "warehouse_class" AS "warehouse_class"
> where
> "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
> and "warehouse_class"."description" = 'Large Owned'
> )
> ) as "m0",
> count(
> distinct (
> select
> "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
> from
> "warehouse_class" AS "warehouse_class"
> where
> "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
> and "warehouse_class"."description" = 'Large Independent'
> )
> ) as "m1",
> count(
> (
> select
> "warehouse_class"."warehouse_class_id" AS "warehouse_class_id"
> from
> "warehouse_class" AS "warehouse_class"
> where
> "warehouse_class"."warehouse_class_id" = "warehouse"."warehouse_class_id"
> and "warehouse_class"."description" = 'Large Independent'
> )
> ) as "m2",
> count(distinct "store_id" + "warehouse_id") as "m3",
> count("store_id" + "warehouse_id") as "m4",
> count("warehouse"."stores_id") as "m5"
> from
> "store" as "store",
> "warehouse" as "warehouse"
> where
> "warehouse"."stores_id" = "store"."store_id"
> group by
> "store"."store_type"{code}
> when set SqlToRelConverter.Config.expand = true
> {code:java}
> LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
> LogicalProject(c0=[$1], $f0=[$39], $f00=[$40], $f01=[$42], $f4=[+($0, $24)], stores_id=[$26])
> LogicalJoin(condition=[=($25, $41)], joinType=[left])
> LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$39], $f040=[$41])
> LogicalJoin(condition=[=($25, $40)], joinType=[left])
> LogicalProject(store_id=[$0], store_type=[$1], region_id=[$2], store_name=[$3], store_number=[$4], store_street_address=[$5], store_city=[$6], store_state=[$7], store_postal_code=[$8], store_country=[$9], store_manager=[$10], store_phone=[$11], store_fax=[$12], first_opened_date=[$13], last_remodel_date=[$14], store_sqft=[$15], grocery_sqft=[$16], frozen_sqft=[$17], meat_sqft=[$18], coffee_bar=[$19], video_store=[$20], salad_bar=[$21], prepared_food=[$22], florist=[$23], warehouse_id=[$24], warehouse_class_id=[$25], stores_id=[$26], warehouse_name=[$27], wa_address1=[$28], wa_address2=[$29], wa_address3=[$30], wa_address4=[$31], warehouse_city=[$32], warehouse_state_province=[$33], warehouse_postal_code=[$34], warehouse_country=[$35], warehouse_owner_name=[$36], warehouse_phone=[$37], warehouse_fax=[$38], $f0=[$40])
> LogicalJoin(condition=[=($25, $39)], joinType=[left])
> LogicalJoin(condition=[=($26, $0)], joinType=[inner])
> JdbcTableScan(table=[[foodmart, store]])
> JdbcTableScan(table=[[foodmart, warehouse]])
> LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
> LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
> LogicalFilter(condition=[=($1, 'Large Owned')])
> JdbcTableScan(table=[[foodmart, warehouse_class]])
> LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
> LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
> LogicalFilter(condition=[=($1, 'Large Independent')])
> JdbcTableScan(table=[[foodmart, warehouse_class]])
> LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
> LogicalProject(warehouse_class_id1=[$0], warehouse_class_id=[$0])
> LogicalFilter(condition=[=($1, 'Large Independent')])
> JdbcTableScan(table=[[foodmart, warehouse_class]]){code}
> when set SqlToRelConverter.Config.expand = false
> {code:java}
> LogicalAggregate(group=[{0}], m0=[COUNT(DISTINCT $1)], m1=[COUNT(DISTINCT $2)], m2=[COUNT($3)], m3=[COUNT(DISTINCT $4)], m4=[COUNT()], m5=[COUNT($5)])
> LogicalProject(c0=[$1], $f1=[$SCALAR_QUERY({
> LogicalProject(warehouse_class_id=[$0])
> LogicalFilter(condition=[AND(=($0, $cor0.warehouse_class_id), =($1, 'Large Owned'))])
> JdbcTableScan(table=[[foodmart, warehouse_class]])
> })], $f2=[$SCALAR_QUERY({
> LogicalProject(warehouse_class_id=[$0])
> LogicalFilter(condition=[AND(=($0, $cor1.warehouse_class_id), =($1, 'Large Independent'))])
> JdbcTableScan(table=[[foodmart, warehouse_class]])
> })], $f3=[$SCALAR_QUERY({
> LogicalProject(warehouse_class_id=[$0])
> LogicalFilter(condition=[AND(=($0, $cor2.warehouse_class_id), =($1, 'Large Independent'))])
> JdbcTableScan(table=[[foodmart, warehouse_class]])
> })], $f4=[+($0, $24)], stores_id=[$26])
> LogicalFilter(condition=[=($26, $0)])
> LogicalJoin(condition=[true], joinType=[inner])
> JdbcTableScan(table=[[foodmart, store]])
> JdbcTableScan(table=[[foodmart, warehouse]]){code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)