You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ja...@apache.org on 2023/06/18 09:49:10 UTC

[doris] branch master updated: [fix](Nereids): MergeSetOperations can merge SetOperation ALL. (#20902)

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ac3290021d [fix](Nereids): MergeSetOperations can merge SetOperation ALL. (#20902)
ac3290021d is described below

commit ac3290021dbdc5d55229c0a6218f7031c048bb68
Author: jakevin <ja...@gmail.com>
AuthorDate: Sun Jun 18 17:49:03 2023 +0800

    [fix](Nereids): MergeSetOperations can merge SetOperation ALL. (#20902)
---
 .../doris/nereids/jobs/executor/Rewriter.java      | 33 +++++----
 .../nereids/rules/rewrite/BuildAggForUnion.java    |  7 +-
 .../nereids/rules/rewrite/MergeSetOperations.java  |  6 +-
 .../nereids_tpcds_shape_sf100_p0/shape/query49.out | 84 +++++++++++-----------
 .../nereids_tpcds_shape_sf100_p0/shape/query75.out | 68 +++++++++---------
 5 files changed, 100 insertions(+), 98 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 9697a8d007..ec8bfb6531 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -218,21 +218,28 @@ public class Rewriter extends AbstractBatchJobExecutor {
             // this rule should invoke after ColumnPruning
             custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new),
 
-            // we need to execute this rule at the end of rewrite
-            // to avoid two consecutive same project appear when we do optimization.
             topic("Others optimization",
-                    bottomUp(ImmutableList.<RuleFactory>builder().addAll(ImmutableList.of(
-                            new EliminateNotNull(),
-                            new EliminateLimit(),
-                            new EliminateFilter(),
-                            new EliminateAggregate(),
-                            new MergeSetOperations(),
-                            new PushdownLimit(),
-                            new BuildAggForUnion()
-                            // after eliminate filter, the project maybe can push down again,
-                            // so we add push down rules
-                    )).addAll(RuleSet.PUSH_DOWN_FILTERS).build())
+                    bottomUp(ImmutableList.<RuleFactory>builder()
+                            .addAll(ImmutableList.of(
+                                    new EliminateNotNull(),
+                                    new EliminateLimit(),
+                                    new EliminateFilter(),
+                                    new EliminateAggregate(),
+                                    new PushdownLimit()
+                            ))
+                            // after eliminate some plan, we maybe can push down some plan again, so add push down rules
+                            .add(new PushdownLimit())
+                            .addAll(RuleSet.PUSH_DOWN_FILTERS)
+                            .build()
+                    )
             ),
+
+            topic("Intersection optimization",
+                    // Do MergeSetOperation first because we hope to match pattern of Distinct SetOperator.
+                    bottomUp(new MergeSetOperations()),
+                    bottomUp(new BuildAggForUnion())
+            ),
+
             topic("Window optimization",
                     topDown(
                             new PushdownLimit(),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java
index 3e13860fd5..d1f4ba2d04 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/BuildAggForUnion.java
@@ -27,7 +27,12 @@ import com.google.common.collect.ImmutableList;
 import java.util.Optional;
 
 /**
- * For distinct union, add agg node.
+ * Convert Union into Agg + UnionAll.
+ * <pre>
+ *             Agg
+ * Union ->     |
+ *           UnionAll
+ * </pre>
  */
 public class BuildAggForUnion extends OneRewriteRuleFactory {
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java
index f0a72be315..9d17f00e88 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeSetOperations.java
@@ -48,7 +48,6 @@ public class MergeSetOperations implements RewriteRuleFactory {
     @Override
     public List<Rule> buildRules() {
         return ImmutableList.of(
-            RuleType.MERGE_SET_OPERATION.build(
                 logicalSetOperation(any(), any()).when(MergeSetOperations::canMerge).then(parentSetOperation -> {
                     List<Plan> newChildren = parentSetOperation.children()
                             .stream()
@@ -61,8 +60,7 @@ public class MergeSetOperations implements RewriteRuleFactory {
                             }).collect(ImmutableList.toImmutableList());
 
                     return parentSetOperation.withChildren(newChildren);
-                })
-            )
+                }).toRule(RuleType.MERGE_SET_OPERATION)
         );
     }
 
@@ -80,7 +78,7 @@ public class MergeSetOperations implements RewriteRuleFactory {
     }
 
     public static boolean isSameQualifierOrChildQualifierIsAll(LogicalSetOperation parentSetOperation,
-                                                         LogicalSetOperation childSetOperation) {
+            LogicalSetOperation childSetOperation) {
         return parentSetOperation.getQualifier() == childSetOperation.getQualifier()
                 || childSetOperation.getQualifier() == Qualifier.ALL;
     }
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out
index ec39065e1f..557f4a5e33 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out
@@ -7,64 +7,60 @@ PhysicalTopN
 --------PhysicalDistribute
 ----------hashAgg[LOCAL]
 ------------PhysicalUnion
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute
-------------------hashAgg[LOCAL]
---------------------PhysicalUnion
-----------------------PhysicalProject
-------------------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
+--------------PhysicalProject
+----------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
+------------------PhysicalWindow
+--------------------PhysicalQuickSort
+----------------------PhysicalDistribute
+------------------------PhysicalQuickSort
 --------------------------PhysicalWindow
 ----------------------------PhysicalQuickSort
 ------------------------------PhysicalDistribute
 --------------------------------PhysicalQuickSort
-----------------------------------PhysicalWindow
-------------------------------------PhysicalQuickSort
+----------------------------------PhysicalProject
+------------------------------------hashAgg[GLOBAL]
 --------------------------------------PhysicalDistribute
-----------------------------------------PhysicalQuickSort
+----------------------------------------hashAgg[LOCAL]
 ------------------------------------------PhysicalProject
---------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------PhysicalDistribute
-------------------------------------------------hashAgg[LOCAL]
+--------------------------------------------hashJoin[INNER_JOIN](ws.ws_order_number = wr.wr_order_number)(item = wr.wr_item_sk)
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((wr.wr_return_amt > 10000.00))
+--------------------------------------------------PhysicalOlapScan[web_returns]
+----------------------------------------------hashJoin[INNER_JOIN](ws.ws_sold_date_sk = date_dim.d_date_sk)
+------------------------------------------------PhysicalProject
+--------------------------------------------------filter((ws.ws_net_paid > 0.00)(ws.ws_quantity > 0)(ws.ws_net_profit > 1.00))
+----------------------------------------------------PhysicalOlapScan[web_sales]
+------------------------------------------------PhysicalDistribute
 --------------------------------------------------PhysicalProject
-----------------------------------------------------hashJoin[INNER_JOIN](ws.ws_order_number = wr.wr_order_number)(item = wr.wr_item_sk)
-------------------------------------------------------PhysicalProject
---------------------------------------------------------filter((wr.wr_return_amt > 10000.00))
-----------------------------------------------------------PhysicalOlapScan[web_returns]
-------------------------------------------------------hashJoin[INNER_JOIN](ws.ws_sold_date_sk = date_dim.d_date_sk)
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------filter((ws.ws_net_paid > 0.00)(ws.ws_quantity > 0)(ws.ws_net_profit > 1.00))
-------------------------------------------------------------PhysicalOlapScan[web_sales]
---------------------------------------------------------PhysicalDistribute
-----------------------------------------------------------PhysicalProject
-------------------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
---------------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------PhysicalProject
-------------------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
+----------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
+------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------PhysicalProject
+----------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
+------------------PhysicalWindow
+--------------------PhysicalQuickSort
+----------------------PhysicalDistribute
+------------------------PhysicalQuickSort
 --------------------------PhysicalWindow
 ----------------------------PhysicalQuickSort
 ------------------------------PhysicalDistribute
 --------------------------------PhysicalQuickSort
-----------------------------------PhysicalWindow
-------------------------------------PhysicalQuickSort
+----------------------------------PhysicalProject
+------------------------------------hashAgg[GLOBAL]
 --------------------------------------PhysicalDistribute
-----------------------------------------PhysicalQuickSort
+----------------------------------------hashAgg[LOCAL]
 ------------------------------------------PhysicalProject
---------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------PhysicalDistribute
-------------------------------------------------hashAgg[LOCAL]
+--------------------------------------------hashJoin[INNER_JOIN](cs.cs_order_number = cr.cr_order_number)(item = cr.cr_item_sk)
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((cr.cr_return_amount > 10000.00))
+--------------------------------------------------PhysicalOlapScan[catalog_returns]
+----------------------------------------------hashJoin[INNER_JOIN](cs.cs_sold_date_sk = date_dim.d_date_sk)
+------------------------------------------------PhysicalProject
+--------------------------------------------------filter((cs.cs_net_paid > 0.00)(cs.cs_quantity > 0)(cs.cs_net_profit > 1.00))
+----------------------------------------------------PhysicalOlapScan[catalog_sales]
+------------------------------------------------PhysicalDistribute
 --------------------------------------------------PhysicalProject
-----------------------------------------------------hashJoin[INNER_JOIN](cs.cs_order_number = cr.cr_order_number)(item = cr.cr_item_sk)
-------------------------------------------------------PhysicalProject
---------------------------------------------------------filter((cr.cr_return_amount > 10000.00))
-----------------------------------------------------------PhysicalOlapScan[catalog_returns]
-------------------------------------------------------hashJoin[INNER_JOIN](cs.cs_sold_date_sk = date_dim.d_date_sk)
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------filter((cs.cs_net_paid > 0.00)(cs.cs_quantity > 0)(cs.cs_net_profit > 1.00))
-------------------------------------------------------------PhysicalOlapScan[catalog_sales]
---------------------------------------------------------PhysicalDistribute
-----------------------------------------------------------PhysicalProject
-------------------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
---------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
+------------------------------------------------------PhysicalOlapScan[date_dim]
 --------------PhysicalProject
 ----------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
 ------------------PhysicalWindow
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out
index 5964454aa5..0a86b61e33 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out
@@ -7,44 +7,40 @@ CteAnchor[cteId= ( CTEId#3=] )
 --------PhysicalDistribute
 ----------hashAgg[LOCAL]
 ------------PhysicalUnion
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute
-------------------hashAgg[LOCAL]
---------------------PhysicalUnion
-----------------------PhysicalProject
-------------------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number)
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_returns]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)
-------------------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk)
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales]
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
---------------------------------------PhysicalOlapScan[item]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
-------------------------------------PhysicalOlapScan[date_dim]
-----------------------PhysicalProject
-------------------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
+--------------PhysicalProject
+----------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number)
+------------------PhysicalProject
+--------------------PhysicalOlapScan[catalog_returns]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)
+----------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk)
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[catalog_sales]
+------------------------PhysicalDistribute
 --------------------------PhysicalProject
-----------------------------PhysicalOlapScan[store_returns]
+----------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
+------------------------------PhysicalOlapScan[item]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
+----------------------------PhysicalOlapScan[date_dim]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
+------------------PhysicalProject
+--------------------PhysicalOlapScan[store_returns]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk)
+----------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[store_sales]
+------------------------PhysicalDistribute
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk)
-------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[store_sales]
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
---------------------------------------PhysicalOlapScan[item]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
+------------------------------PhysicalOlapScan[item]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
+----------------------------PhysicalOlapScan[date_dim]
 --------------PhysicalProject
 ----------------hashJoin[RIGHT_OUTER_JOIN](web_sales.ws_item_sk = web_returns.wr_item_sk)(web_sales.ws_order_number = web_returns.wr_order_number)
 ------------------PhysicalProject


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org