You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/26 12:01:43 UTC

[doris] branch master updated: [opt](Nereids) forbid some bad pattern aggregate in AggregateStrategy (#18877)

This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e83d0d9b6a [opt](Nereids) forbid some bad pattern aggregate in AggregateStrategy (#18877)
e83d0d9b6a is described below

commit e83d0d9b6afbe3d4554b929d9e1d50dd5353c55c
Author: morrySnow <10...@users.noreply.github.com>
AuthorDate: Wed Apr 26 20:01:35 2023 +0800

    [opt](Nereids) forbid some bad pattern aggregate in AggregateStrategy (#18877)
    
    since we cannot do stats derive and cost estimate on agg very good.
    this PR remove some aggregate pattern that usually not good.
    1. one stage agg after exchange. this pattern is good only when process very few rows.
    2. three stage distinct agg with gather middle merge.
---
 .../properties/ChildrenPropertiesRegulator.java    |  5 +++
 .../rules/implementation/AggregateStrategies.java  |  3 +-
 .../data/nereids_tpch_shape_sf1_p0/shape/q19.out   | 19 +++++-----
 .../data/nereids_tpch_shape_sf1_p0/shape/q21.out   | 41 +++++++++++-----------
 .../data/nereids_tpch_shape_sf500_p0/shape/q21.out | 41 +++++++++++-----------
 .../nereids_function_p0/agg_function/agg.groovy    | 10 +++---
 .../sub_query_count_with_const.groovy              | 11 +-----
 7 files changed, 66 insertions(+), 64 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
index 35d4b367a0..bd73a33cf1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
@@ -24,6 +24,7 @@ import org.apache.doris.nereids.jobs.JobContext;
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.DistributionSpecHash.ShuffleType;
 import org.apache.doris.nereids.trees.expressions.ExprId;
+import org.apache.doris.nereids.trees.plans.AggMode;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalDistribute;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate;
@@ -77,6 +78,10 @@ public class ChildrenPropertiesRegulator extends PlanVisitor<Double, Void> {
 
     @Override
     public Double visitPhysicalHashAggregate(PhysicalHashAggregate<? extends Plan> agg, Void context) {
+        if (agg.getAggMode() == AggMode.INPUT_TO_RESULT
+                && children.get(0).getPlan() instanceof PhysicalDistribute) {
+            return -1.0;
+        }
         return 0.0;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index be914f938e..50bbbc0b20 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -980,7 +980,8 @@ public class AggregateStrategies implements ImplementationRuleFactory {
 
         if (logicalAgg.getGroupByExpressions().isEmpty()) {
             return ImmutableList.<PhysicalHashAggregate<? extends Plan>>builder()
-                    .add(anyLocalGatherGlobalGatherDistinctAgg)
+                    // TODO: this plan pattern is not good usually, we remove it temporary.
+                    // .add(anyLocalGatherGlobalGatherDistinctAgg)
                     .add(anyLocalHashGlobalGatherDistinctAgg)
                     .build();
         } else {
diff --git a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q19.out b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q19.out
index f53d475bb5..1b877ad328 100644
--- a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q19.out
+++ b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q19.out
@@ -1,14 +1,15 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
 -- !select --
-hashAgg[LOCAL]
+hashAgg[GLOBAL]
 --PhysicalDistribute
-----PhysicalProject
-------hashJoin[INNER_JOIN](part.p_partkey = lineitem.l_partkey)((((((part.p_brand = 'Brand#12') AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')) AND ((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00))) AND (part.p_size <= 5)) OR ((((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')) AND ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) AND (part.p_size <= 10))) OR ((((part.p_brand = 'Brand#34') AND  [...]
---------PhysicalProject
-----------filter((lineitem.l_shipinstruct = 'DELIVER IN PERSON')((((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00)) OR ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) OR ((lineitem.l_quantity >= 20.00) AND (lineitem.l_quantity <= 30.00)))((lineitem.l_shipmode = 'AIR') OR (lineitem.l_shipmode = 'AIR REG')))
-------------PhysicalOlapScan[lineitem]
---------PhysicalDistribute
+----hashAgg[LOCAL]
+------PhysicalProject
+--------hashJoin[INNER_JOIN](part.p_partkey = lineitem.l_partkey)((((((part.p_brand = 'Brand#12') AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')) AND ((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00))) AND (part.p_size <= 5)) OR ((((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')) AND ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) AND (part.p_size <= 10))) OR ((((part.p_brand = 'Brand#34') AN [...]
 ----------PhysicalProject
-------------filter((((((part.p_brand = 'Brand#12') AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')) AND (part.p_size <= 5)) OR (((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')) AND (part.p_size <= 10))) OR (((part.p_brand = 'Brand#34') AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')) AND (part.p_size <= 15)))(part.p_size >= 1))
---------------PhysicalOlapScan[part]
+------------filter((lineitem.l_shipinstruct = 'DELIVER IN PERSON')((((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00)) OR ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) OR ((lineitem.l_quantity >= 20.00) AND (lineitem.l_quantity <= 30.00)))((lineitem.l_shipmode = 'AIR') OR (lineitem.l_shipmode = 'AIR REG')))
+--------------PhysicalOlapScan[lineitem]
+----------PhysicalDistribute
+------------PhysicalProject
+--------------filter((((((part.p_brand = 'Brand#12') AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')) AND (part.p_size <= 5)) OR (((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')) AND (part.p_size <= 10))) OR (((part.p_brand = 'Brand#34') AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')) AND (part.p_size <= 15)))(part.p_size >= 1))
+----------------PhysicalOlapScan[part]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q21.out b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q21.out
index 1e26a0f353..1ebbb4a839 100644
--- a/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q21.out
+++ b/regression-test/data/nereids_tpch_shape_sf1_p0/shape/q21.out
@@ -3,30 +3,31 @@
 PhysicalTopN
 --PhysicalDistribute
 ----PhysicalTopN
-------hashAgg[LOCAL]
+------hashAgg[GLOBAL]
 --------PhysicalDistribute
-----------PhysicalProject
-------------hashJoin[RIGHT_SEMI_JOIN](l2.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
---------------PhysicalProject
-----------------PhysicalOlapScan[lineitem]
---------------hashJoin[INNER_JOIN](orders.o_orderkey = l1.l_orderkey)
+----------hashAgg[LOCAL]
+------------PhysicalProject
+--------------hashJoin[RIGHT_SEMI_JOIN](l2.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
 ----------------PhysicalProject
-------------------filter((orders.o_orderstatus = 'F'))
---------------------PhysicalOlapScan[orders]
-----------------hashJoin[RIGHT_ANTI_JOIN](l3.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
+------------------PhysicalOlapScan[lineitem]
+----------------hashJoin[INNER_JOIN](orders.o_orderkey = l1.l_orderkey)
 ------------------PhysicalProject
---------------------filter((l3.l_receiptdate > l3.l_commitdate))
-----------------------PhysicalOlapScan[lineitem]
-------------------hashJoin[INNER_JOIN](supplier.s_suppkey = l1.l_suppkey)
+--------------------filter((orders.o_orderstatus = 'F'))
+----------------------PhysicalOlapScan[orders]
+------------------hashJoin[RIGHT_ANTI_JOIN](l3.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
 --------------------PhysicalProject
-----------------------filter((l1.l_receiptdate > l1.l_commitdate))
+----------------------filter((l3.l_receiptdate > l3.l_commitdate))
 ------------------------PhysicalOlapScan[lineitem]
---------------------PhysicalDistribute
-----------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[supplier]
-------------------------PhysicalDistribute
+--------------------hashJoin[INNER_JOIN](supplier.s_suppkey = l1.l_suppkey)
+----------------------PhysicalProject
+------------------------filter((l1.l_receiptdate > l1.l_commitdate))
+--------------------------PhysicalOlapScan[lineitem]
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
 --------------------------PhysicalProject
-----------------------------filter((nation.n_name = 'SAUDI ARABIA'))
-------------------------------PhysicalOlapScan[nation]
+----------------------------PhysicalOlapScan[supplier]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((nation.n_name = 'SAUDI ARABIA'))
+--------------------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q21.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q21.out
index 1e26a0f353..1ebbb4a839 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q21.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q21.out
@@ -3,30 +3,31 @@
 PhysicalTopN
 --PhysicalDistribute
 ----PhysicalTopN
-------hashAgg[LOCAL]
+------hashAgg[GLOBAL]
 --------PhysicalDistribute
-----------PhysicalProject
-------------hashJoin[RIGHT_SEMI_JOIN](l2.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
---------------PhysicalProject
-----------------PhysicalOlapScan[lineitem]
---------------hashJoin[INNER_JOIN](orders.o_orderkey = l1.l_orderkey)
+----------hashAgg[LOCAL]
+------------PhysicalProject
+--------------hashJoin[RIGHT_SEMI_JOIN](l2.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
 ----------------PhysicalProject
-------------------filter((orders.o_orderstatus = 'F'))
---------------------PhysicalOlapScan[orders]
-----------------hashJoin[RIGHT_ANTI_JOIN](l3.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
+------------------PhysicalOlapScan[lineitem]
+----------------hashJoin[INNER_JOIN](orders.o_orderkey = l1.l_orderkey)
 ------------------PhysicalProject
---------------------filter((l3.l_receiptdate > l3.l_commitdate))
-----------------------PhysicalOlapScan[lineitem]
-------------------hashJoin[INNER_JOIN](supplier.s_suppkey = l1.l_suppkey)
+--------------------filter((orders.o_orderstatus = 'F'))
+----------------------PhysicalOlapScan[orders]
+------------------hashJoin[RIGHT_ANTI_JOIN](l3.l_orderkey = l1.l_orderkey)( not (l_suppkey = l_suppkey))
 --------------------PhysicalProject
-----------------------filter((l1.l_receiptdate > l1.l_commitdate))
+----------------------filter((l3.l_receiptdate > l3.l_commitdate))
 ------------------------PhysicalOlapScan[lineitem]
---------------------PhysicalDistribute
-----------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[supplier]
-------------------------PhysicalDistribute
+--------------------hashJoin[INNER_JOIN](supplier.s_suppkey = l1.l_suppkey)
+----------------------PhysicalProject
+------------------------filter((l1.l_receiptdate > l1.l_commitdate))
+--------------------------PhysicalOlapScan[lineitem]
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
 --------------------------PhysicalProject
-----------------------------filter((nation.n_name = 'SAUDI ARABIA'))
-------------------------------PhysicalOlapScan[nation]
+----------------------------PhysicalOlapScan[supplier]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((nation.n_name = 'SAUDI ARABIA'))
+--------------------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/suites/nereids_function_p0/agg_function/agg.groovy b/regression-test/suites/nereids_function_p0/agg_function/agg.groovy
index 70a262d8fd..d156265d0d 100644
--- a/regression-test/suites/nereids_function_p0/agg_function/agg.groovy
+++ b/regression-test/suites/nereids_function_p0/agg_function/agg.groovy
@@ -1458,8 +1458,9 @@ suite("nereids_agg_fn") {
 	qt_sql_ndv_AnyData_agg_phase_4_notnull '''
 		select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT, TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct id), ndv(kint) from fn_test'''
 
-	qt_sql_orthogonal_bitmap_union_count_Bitmap_gb '''
-		select orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test group by kbool order by kbool'''
+    // this function result is unstable
+	// qt_sql_orthogonal_bitmap_union_count_Bitmap_gb '''
+	// 	select orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test group by kbool order by kbool'''
 	qt_sql_orthogonal_bitmap_union_count_Bitmap '''
 		select orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test'''
 	qt_sql_orthogonal_bitmap_union_count_Bitmap_agg_phase_1 '''
@@ -1470,8 +1471,9 @@ suite("nereids_agg_fn") {
 	// 	select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT, TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct id, kint), orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test group by kbool order by kbool'''
 	qt_sql_orthogonal_bitmap_union_count_Bitmap_agg_phase_4 '''
 		select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT, TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct id), orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test'''
-	qt_sql_orthogonal_bitmap_union_count_Bitmap_gb_notnull '''
-		select orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test_not_nullable group by kbool order by kbool'''
+    // this function result is unstable
+	// qt_sql_orthogonal_bitmap_union_count_Bitmap_gb_notnull '''
+	// 	select orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test_not_nullable group by kbool order by kbool'''
 	qt_sql_orthogonal_bitmap_union_count_Bitmap_notnull '''
 		select orthogonal_bitmap_union_count(bitmap_hash(kbint)) from fn_test_not_nullable'''
 	qt_sql_orthogonal_bitmap_union_count_Bitmap_agg_phase_1_notnull '''
diff --git a/regression-test/suites/nereids_syntax_p0/sub_query_count_with_const.groovy b/regression-test/suites/nereids_syntax_p0/sub_query_count_with_const.groovy
index ffe07644d5..02d6453aef 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_count_with_const.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_count_with_const.groovy
@@ -40,13 +40,4 @@ suite("sub_query_count_with_const") {
                        select 2022 as dt ,sum(id)
                        from sub_query_count_with_const
                  ) tmp;"""
-
-    explain {
-        sql ("""select count(1) as count
-                from (
-                      select 2022 as dt ,sum(id)
-                      from sub_query_count_with_const
-                ) tmp;""")
-        contains "output: sum(id[#0])[#1]"
-    }
-}
\ No newline at end of file
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org