You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by en...@apache.org on 2023/06/19 15:40:51 UTC

[doris] branch master updated: [opt](Nereids) update join stats derive (#20895)

This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f20ef165fe [opt](Nereids) update join stats derive (#20895)
f20ef165fe is described below

commit f20ef165fe03b5b2d5fd6b7ee9d6668afbcb4784
Author: minghong <en...@gmail.com>
AuthorDate: Mon Jun 19 23:40:44 2023 +0800

    [opt](Nereids) update join stats derive (#20895)
    
    in hash join condition, some equals are trustable, some are not.
    an equal is trustable if one side is almost unique, like primary key. for such equal condition we could estimate more accurate.
    the problem is in rewriten q20, the are 2 equal condition, one is trustable, another is not. But we treat both of them as trustable.
    
    Test result:
    on tpch100, from 2.2 sec to 0.44 sec
    no impact on tpch other queries
    no performance impact on tpcds queries
---
 .../apache/doris/nereids/stats/JoinEstimation.java |   9 +-
 .../nereids_tpcds_shape_sf100_p0/shape/query24.out |  40 +++----
 .../nereids_tpcds_shape_sf100_p0/shape/query46.out |  14 +--
 .../nereids_tpcds_shape_sf100_p0/shape/query72.out |  85 +++++++-------
 .../nereids_tpcds_shape_sf100_p0/shape/query80.out | 123 ++++++++++-----------
 .../shape/q20-rewrite.out                          |  32 ++++++
 .../shape/q20-rewrite.groovy                       |  66 +++++++++++
 7 files changed, 235 insertions(+), 134 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index df36d84a4a..a45d2c6175 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -90,6 +90,7 @@ public class JoinEstimation {
          * In order to avoid error propagation, for unTrustEquations, we only use the biggest selectivity.
          */
         List<Double> unTrustEqualRatio = Lists.newArrayList();
+        List<EqualTo> unTrustableCondition = Lists.newArrayList();
         boolean leftBigger = leftStats.getRowCount() > rightStats.getRowCount();
         List<EqualTo> trustableConditions = join.getHashJoinConjuncts().stream()
                 .map(expression -> (EqualTo) expression)
@@ -115,6 +116,7 @@ public class JoinEstimation {
                                 unTrustEqualRatio.add((leftStatsRowCount / lNdv)
                                         * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / rNdv);
                             }
+                            unTrustableCondition.add(equal);
                         }
                         return trustable;
                     }
@@ -129,7 +131,7 @@ public class JoinEstimation {
 
         double outputRowCount = 1;
         if (!trustableConditions.isEmpty()) {
-            List<Pair<Expression, Double>> sortedJoinConditions = join.getHashJoinConjuncts().stream()
+            List<Pair<? extends Expression, Double>> sortedJoinConditions = trustableConditions.stream()
                     .map(expression -> Pair.of(expression, estimateJoinConditionSel(crossJoinStats, expression)))
                     .sorted((a, b) -> {
                         double sub = a.second - b.second;
@@ -147,14 +149,17 @@ public class JoinEstimation {
                 sel *= Math.pow(sortedJoinConditions.get(i).second, 1 / Math.pow(2, i));
             }
             outputRowCount = Math.max(1, crossJoinStats.getRowCount() * sel);
+            outputRowCount = outputRowCount * Math.pow(0.9, unTrustableCondition.size());
+            innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount);
         } else {
             outputRowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount());
             Optional<Double> ratio = unTrustEqualRatio.stream().max(Double::compareTo);
             if (ratio.isPresent()) {
                 outputRowCount = Math.max(1, outputRowCount * ratio.get());
             }
+            innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount);
         }
-        innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount);
+
         if (!join.getOtherJoinConjuncts().isEmpty()) {
             FilterEstimation filterEstimation = new FilterEstimation();
             innerJoinStats = filterEstimation.estimate(
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
index 2b16127efb..c887e96371 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
@@ -7,30 +7,30 @@ CteAnchor[cteId= ( CTEId#0=] )
 --------PhysicalDistribute
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
+--------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
 ----------------PhysicalProject
-------------------PhysicalOlapScan[item]
-----------------PhysicalDistribute
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
+------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
+--------------------hashJoin[INNER_JOIN](store.s_zip = customer_address.ca_zip)(customer.c_current_addr_sk = customer_address.ca_address_sk)( not (c_birth_country = upper(ca_country)))
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_returns]
-----------------------hashJoin[INNER_JOIN](store.s_zip = customer_address.ca_zip)(customer.c_current_addr_sk = customer_address.ca_address_sk)( not (c_birth_country = upper(ca_country)))
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN](store_sales.ss_customer_sk = customer.c_customer_sk)
-----------------------------hashJoin[INNER_JOIN](store_sales.ss_store_sk = store.s_store_sk)
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((store.s_market_id = 8))
-------------------------------------PhysicalOlapScan[store]
+------------------------hashJoin[INNER_JOIN](store_sales.ss_customer_sk = customer.c_customer_sk)
+--------------------------hashJoin[INNER_JOIN](store_sales.ss_store_sk = store.s_store_sk)
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales]
 ----------------------------PhysicalDistribute
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer_address]
+--------------------------------filter((store.s_market_id = 8))
+----------------------------------PhysicalOlapScan[store]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer_address]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[item]
+----------------PhysicalProject
+------------------PhysicalOlapScan[store_returns]
 --PhysicalQuickSort
 ----PhysicalDistribute
 ------PhysicalQuickSort
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query46.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query46.out
index c11c6f2c00..f86ecfbb99 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query46.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query46.out
@@ -6,13 +6,6 @@ PhysicalTopN
 ------PhysicalProject
 --------hashJoin[INNER_JOIN](dn.ss_customer_sk = customer.c_customer_sk)( not (ca_city = bought_city))
 ----------PhysicalDistribute
-------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = current_addr.ca_address_sk)
---------------PhysicalProject
-----------------PhysicalOlapScan[customer_address]
---------------PhysicalDistribute
-----------------PhysicalProject
-------------------PhysicalOlapScan[customer]
-----------PhysicalDistribute
 ------------PhysicalProject
 --------------hashAgg[LOCAL]
 ----------------PhysicalProject
@@ -38,4 +31,11 @@ PhysicalTopN
 --------------------------------PhysicalOlapScan[store]
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[customer_address]
+----------PhysicalDistribute
+------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = current_addr.ca_address_sk)
+--------------PhysicalProject
+----------------PhysicalOlapScan[customer_address]
+--------------PhysicalDistribute
+----------------PhysicalProject
+------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
index 31f07b459f..0e04db4a1f 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
@@ -7,54 +7,55 @@ PhysicalTopN
 --------PhysicalDistribute
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk)
+--------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk = catalog_sales.cs_item_sk)(catalog_returns.cr_order_number = catalog_sales.cs_order_number)
 ----------------PhysicalProject
-------------------PhysicalOlapScan[item]
+------------------PhysicalOlapScan[catalog_returns]
 ----------------PhysicalDistribute
 ------------------PhysicalProject
---------------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk = catalog_sales.cs_item_sk)(catalog_returns.cr_order_number = catalog_sales.cs_order_number)
+--------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk = promotion.p_promo_sk)
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[catalog_returns]
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk = promotion.p_promo_sk)
-----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN](warehouse.w_warehouse_sk = inventory.inv_warehouse_sk)
---------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = inventory.inv_item_sk)(inventory.inv_date_sk = d2.d_date_sk)(inventory.inv_quantity_on_hand < catalog_sales.cs_quantity)
-------------------------------------PhysicalDistribute
---------------------------------------PhysicalOlapScan[inventory]
-------------------------------------PhysicalDistribute
---------------------------------------PhysicalProject
-----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk)
-------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk)
---------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d1.d_date_sk)(catalog_sales.cs_ship_date_sk = d3.d_date_sk)
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[catalog_sales]
-----------------------------------------------PhysicalDistribute
-------------------------------------------------hashJoin[INNER_JOIN](d1.d_week_seq = d2.d_week_seq)
---------------------------------------------------NestedLoopJoin[INNER_JOIN](d3.d_date > cast((cast(d_date as BIGINT) + 5) as DATEV2))
-----------------------------------------------------PhysicalProject
-------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------------PhysicalDistribute
-------------------------------------------------------PhysicalProject
---------------------------------------------------------filter((d1.d_year = 2002))
-----------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------hashJoin[INNER_JOIN](warehouse.w_warehouse_sk = inventory.inv_warehouse_sk)
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk)
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN](inventory.inv_date_sk = d2.d_date_sk)(d1.d_week_seq = d2.d_week_seq)
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = inventory.inv_item_sk)(inventory.inv_quantity_on_hand < catalog_sales.cs_quantity)
+--------------------------------------PhysicalDistribute
+----------------------------------------PhysicalOlapScan[inventory]
+--------------------------------------PhysicalDistribute
+----------------------------------------PhysicalProject
+------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_ship_date_sk = d3.d_date_sk)(d3.d_date > cast((cast(d_date as BIGINT) + 5) as DATEV2))
+--------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d1.d_date_sk)
+----------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk = customer_demographics.cd_demo_sk)
+------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_hdemo_sk = household_demographics.hd_demo_sk)
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[catalog_sales]
 --------------------------------------------------PhysicalDistribute
 ----------------------------------------------------PhysicalProject
-------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------------filter((cast(hd_buy_potential as VARCHAR(*)) = '501-1000'))
+--------------------------------------------------------PhysicalOlapScan[household_demographics]
+------------------------------------------------PhysicalDistribute
+--------------------------------------------------PhysicalProject
+----------------------------------------------------filter((cast(cd_marital_status as VARCHAR(*)) = 'W'))
+------------------------------------------------------PhysicalOlapScan[customer_demographics]
+----------------------------------------------PhysicalDistribute
+------------------------------------------------PhysicalProject
+--------------------------------------------------filter((d1.d_year = 2002))
+----------------------------------------------------PhysicalOlapScan[date_dim]
 --------------------------------------------PhysicalDistribute
 ----------------------------------------------PhysicalProject
-------------------------------------------------filter((cast(hd_buy_potential as VARCHAR(*)) = '501-1000'))
---------------------------------------------------PhysicalOlapScan[household_demographics]
-------------------------------------------PhysicalDistribute
---------------------------------------------PhysicalProject
-----------------------------------------------filter((cast(cd_marital_status as VARCHAR(*)) = 'W'))
-------------------------------------------------PhysicalOlapScan[customer_demographics]
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[warehouse]
-----------------------------PhysicalDistribute
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[promotion]
+------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalDistribute
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[date_dim]
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[warehouse]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[promotion]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
index be4f8708a0..29730c3a44 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query80.out
@@ -14,92 +14,89 @@ PhysicalTopN
 ----------------------PhysicalDistribute
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](store_sales.ss_store_sk = store.s_store_sk)
+----------------------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
 ------------------------------PhysicalProject
---------------------------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_returns]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](store_sales.ss_promo_sk = promotion.p_promo_sk)
---------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
-----------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
-------------------------------------------PhysicalProject
---------------------------------------------PhysicalOlapScan[store_sales]
-------------------------------------------PhysicalDistribute
---------------------------------------------PhysicalProject
-----------------------------------------------filter((date_dim.d_date >= 1998-08-28)(date_dim.d_date <= 1998-09-27))
-------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[store_returns]
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN](store_sales.ss_store_sk = store.s_store_sk)
+----------------------------------hashJoin[INNER_JOIN](store_sales.ss_promo_sk = promotion.p_promo_sk)
+------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = item.i_item_sk)
+--------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[store_sales]
 ----------------------------------------PhysicalDistribute
 ------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price > 50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------------------filter((date_dim.d_date >= 1998-08-28)(date_dim.d_date <= 1998-09-27))
+----------------------------------------------PhysicalOlapScan[date_dim]
 --------------------------------------PhysicalDistribute
 ----------------------------------------PhysicalProject
-------------------------------------------filter((cast(p_channel_tv as VARCHAR(*)) = 'N'))
---------------------------------------------PhysicalOlapScan[promotion]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[store]
+------------------------------------------filter((item.i_current_price > 50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute
+--------------------------------------PhysicalProject
+----------------------------------------filter((cast(p_channel_tv as VARCHAR(*)) = 'N'))
+------------------------------------------PhysicalOlapScan[promotion]
+----------------------------------PhysicalDistribute
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[store]
 ------------------PhysicalProject
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](catalog_sales.cs_catalog_page_sk = catalog_page.cp_catalog_page_sk)
+----------------------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number)
 ------------------------------PhysicalProject
---------------------------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number)
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_returns]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_promo_sk = promotion.p_promo_sk)
---------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
-----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
-------------------------------------------PhysicalProject
---------------------------------------------PhysicalOlapScan[catalog_sales]
-------------------------------------------PhysicalDistribute
---------------------------------------------PhysicalProject
-----------------------------------------------filter((date_dim.d_date >= 1998-08-28)(date_dim.d_date <= 1998-09-27))
-------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[catalog_returns]
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_catalog_page_sk = catalog_page.cp_catalog_page_sk)
+----------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_promo_sk = promotion.p_promo_sk)
+------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
+--------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[catalog_sales]
 ----------------------------------------PhysicalDistribute
 ------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price > 50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------------------filter((date_dim.d_date >= 1998-08-28)(date_dim.d_date <= 1998-09-27))
+----------------------------------------------PhysicalOlapScan[date_dim]
 --------------------------------------PhysicalDistribute
 ----------------------------------------PhysicalProject
-------------------------------------------filter((cast(p_channel_tv as VARCHAR(*)) = 'N'))
---------------------------------------------PhysicalOlapScan[promotion]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_page]
+------------------------------------------filter((item.i_current_price > 50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute
+--------------------------------------PhysicalProject
+----------------------------------------filter((cast(p_channel_tv as VARCHAR(*)) = 'N'))
+------------------------------------------PhysicalOlapScan[promotion]
+----------------------------------PhysicalDistribute
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[catalog_page]
 ------------------PhysicalProject
 --------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](web_sales.ws_web_site_sk = web_site.web_site_sk)
+----------------------------hashJoin[RIGHT_OUTER_JOIN](web_sales.ws_item_sk = web_returns.wr_item_sk)(web_sales.ws_order_number = web_returns.wr_order_number)
 ------------------------------PhysicalProject
---------------------------------hashJoin[RIGHT_OUTER_JOIN](web_sales.ws_item_sk = web_returns.wr_item_sk)(web_sales.ws_order_number = web_returns.wr_order_number)
-----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_returns]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](web_sales.ws_promo_sk = promotion.p_promo_sk)
---------------------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
-----------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
-------------------------------------------PhysicalProject
---------------------------------------------PhysicalOlapScan[web_sales]
-------------------------------------------PhysicalDistribute
---------------------------------------------PhysicalProject
-----------------------------------------------filter((date_dim.d_date >= 1998-08-28)(date_dim.d_date <= 1998-09-27))
-------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[web_returns]
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN](web_sales.ws_web_site_sk = web_site.web_site_sk)
+----------------------------------hashJoin[INNER_JOIN](web_sales.ws_promo_sk = promotion.p_promo_sk)
+------------------------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = item.i_item_sk)
+--------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[web_sales]
 ----------------------------------------PhysicalDistribute
 ------------------------------------------PhysicalProject
---------------------------------------------filter((item.i_current_price > 50.00))
-----------------------------------------------PhysicalOlapScan[item]
+--------------------------------------------filter((date_dim.d_date >= 1998-08-28)(date_dim.d_date <= 1998-09-27))
+----------------------------------------------PhysicalOlapScan[date_dim]
 --------------------------------------PhysicalDistribute
 ----------------------------------------PhysicalProject
-------------------------------------------filter((cast(p_channel_tv as VARCHAR(*)) = 'N'))
---------------------------------------------PhysicalOlapScan[promotion]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_site]
+------------------------------------------filter((item.i_current_price > 50.00))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute
+--------------------------------------PhysicalProject
+----------------------------------------filter((cast(p_channel_tv as VARCHAR(*)) = 'N'))
+------------------------------------------PhysicalOlapScan[promotion]
+----------------------------------PhysicalDistribute
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[web_site]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q20-rewrite.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q20-rewrite.out
new file mode 100644
index 0000000000..92f1c8717b
--- /dev/null
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q20-rewrite.out
@@ -0,0 +1,32 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+PhysicalQuickSort
+--PhysicalDistribute
+----PhysicalQuickSort
+------PhysicalProject
+--------hashJoin[RIGHT_SEMI_JOIN](supplier.s_suppkey = t3.ps_suppkey)
+----------PhysicalDistribute
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN](t2.l_partkey = t1.ps_partkey)(t2.l_suppkey = t1.ps_suppkey)(cast(ps_availqty as DECIMALV3(38, 3)) > t2.l_q)
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------filter((lineitem.l_shipdate < 1995-01-01)(lineitem.l_shipdate >= 1994-01-01))
+----------------------------PhysicalOlapScan[lineitem]
+----------------PhysicalDistribute
+------------------hashJoin[LEFT_SEMI_JOIN](partsupp.ps_partkey = part.p_partkey)
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[partsupp]
+--------------------PhysicalProject
+----------------------filter((p_name like 'forest%'))
+------------------------PhysicalOlapScan[part]
+----------hashJoin[INNER_JOIN](supplier.s_nationkey = nation.n_nationkey)
+------------PhysicalProject
+--------------PhysicalOlapScan[supplier]
+------------PhysicalDistribute
+--------------PhysicalProject
+----------------filter((nation.n_name = 'CANADA'))
+------------------PhysicalOlapScan[nation]
+
diff --git a/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20-rewrite.groovy b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20-rewrite.groovy
new file mode 100644
index 0000000000..8f93e6478c
--- /dev/null
+++ b/regression-test/suites/nereids_tpch_shape_sf1000_p0/shape/q20-rewrite.groovy
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("q20-rewrite") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql "set runtime_filter_mode='GLOBAL'"
+
+    sql 'set exec_mem_limit=21G'
+    sql 'SET enable_pipeline_engine = true'
+
+
+
+    
+sql 'set be_number_for_test=3'
+    
+    qt_select """
+    explain shape plan
+select
+s_name, s_address 
+from
+supplier left semi join
+(
+    select * from
+    (
+        select l_partkey,l_suppkey, 0.5 * sum(l_quantity) as l_q
+        from lineitem
+        where l_shipdate >= date '1994-01-01'
+            and l_shipdate < date '1994-01-01' + interval '1' year
+        group by l_partkey,l_suppkey
+    ) t2 join
+    (
+        select ps_partkey, ps_suppkey, ps_availqty
+        from partsupp left semi join part
+        on ps_partkey = p_partkey and p_name like 'forest%'
+    ) t1
+    on t2.l_partkey = t1.ps_partkey and t2.l_suppkey = t1.ps_suppkey
+    and t1.ps_availqty > t2.l_q
+) t3
+on s_suppkey = t3.ps_suppkey
+join nation
+where s_nationkey = n_nationkey
+    and n_name = 'CANADA'
+order by s_name
+;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org