You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by en...@apache.org on 2023/06/30 02:00:37 UTC

[doris] branch master updated: [fix](nereids) nest loop join stats estimation (#21275)

This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9f44c2d80d [fix](nereids) nest loop join stats estimation (#21275)
9f44c2d80d is described below

commit 9f44c2d80dfeba048af7c6dca65bf2ad4b2b674d
Author: minghong <en...@gmail.com>
AuthorDate: Fri Jun 30 10:00:30 2023 +0800

    [fix](nereids) nest loop join stats estimation (#21275)
    
    1. fix bug in nest loop join estimation
    2. update column=column stats estimation
---
 .../doris/nereids/stats/FilterEstimation.java      | 19 +----
 .../apache/doris/nereids/stats/JoinEstimation.java | 85 +++++++++++++---------
 .../nereids_tpcds_shape_sf100_p0/shape/query11.out | 20 ++---
 .../nereids_tpcds_shape_sf100_p0/shape/query18.out | 59 +++++++--------
 .../nereids_tpcds_shape_sf100_p0/shape/query4.out  | 44 +++++------
 .../nereids_tpcds_shape_sf100_p0/shape/query74.out | 16 ++--
 .../nereids_tpch_shape_sf1000_p0/shape/q10.out     | 35 +++++----
 .../data/nereids_tpch_shape_sf1000_p0/shape/q5.out |  5 +-
 .../data/nereids_tpch_shape_sf500_p0/shape/q10.out | 35 +++++----
 .../data/nereids_tpch_shape_sf500_p0/shape/q5.out  |  5 +-
 10 files changed, 167 insertions(+), 156 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index acf072fb82..3ea7caaf08 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -390,24 +390,7 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo
         rightBuilder.setNdv(rightIntersectLeft.getDistinctValues());
         rightBuilder.setMinValue(rightIntersectLeft.getLow());
         rightBuilder.setMaxValue(rightIntersectLeft.getDistinctValues());
-        double sel;
-        double reduceRatio = 0.25;
-        double bothSideReducedRatio = 0.9;
-        if (!leftStats.rangeChanged() && !rightStats.rangeChanged()
-                && leftStats.ndv < leftStats.getOriginalNdv() * bothSideReducedRatio
-                && rightStats.ndv < rightStats.getOriginalNdv() * bothSideReducedRatio) {
-            double sel1;
-            if (leftStats.ndv > rightStats.ndv) {
-                sel1 = 1 / StatsMathUtil.nonZeroDivisor(leftStats.ndv);
-            } else {
-                sel1 = 1 / StatsMathUtil.nonZeroDivisor(rightStats.ndv);
-            }
-            double sel2 = Math.min(rightStats.ndv / rightStats.getOriginalNdv(),
-                    leftStats.ndv / leftStats.getOriginalNdv());
-            sel = sel1 * Math.pow(sel2, reduceRatio);
-        } else {
-            sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
-        }
+        double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
         Statistics updatedStatistics = context.statistics.withSel(sel);
         updatedStatistics.addColumnStats(leftExpr, leftBuilder.build());
         updatedStatistics.addColumnStats(rightExpr, rightBuilder.build());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index 228b6936a7..afef2285e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -72,16 +72,7 @@ public class JoinEstimation {
         return false;
     }
 
-    private static Statistics estimateInnerJoin(Statistics leftStats, Statistics rightStats, Join join) {
-        if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) {
-            double rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount());
-            rowCount = Math.max(1, rowCount);
-            return new StatisticsBuilder()
-                .setRowCount(rowCount)
-                .putColumnStatistics(leftStats.columnStatistics())
-                .putColumnStatistics(rightStats.columnStatistics())
-                .build();
-        }
+    private static Statistics estimateHashJoin(Statistics leftStats, Statistics rightStats, Join join) {
         /*
          * When we estimate filter A=B,
          * if any side of equation, A or B, is almost unique, the confidence level of estimation is high.
@@ -95,31 +86,31 @@ public class JoinEstimation {
         List<EqualTo> trustableConditions = join.getHashJoinConjuncts().stream()
                 .map(expression -> (EqualTo) expression)
                 .filter(
-                    expression -> {
-                        // since ndv is not accurate, if ndv/rowcount < almostUniqueThreshold,
-                        // this column is regarded as unique.
-                        double almostUniqueThreshold = 0.9;
-                        EqualTo equal = normalizeHashJoinCondition(expression, leftStats, rightStats);
-                        ColumnStatistic eqLeftColStats = ExpressionEstimation.estimate(equal.left(), leftStats);
-                        ColumnStatistic eqRightColStats = ExpressionEstimation.estimate(equal.right(), rightStats);
-                        double rightStatsRowCount = StatsMathUtil.nonZeroDivisor(rightStats.getRowCount());
-                        double leftStatsRowCount = StatsMathUtil.nonZeroDivisor(leftStats.getRowCount());
-                        boolean trustable = eqRightColStats.ndv / rightStatsRowCount > almostUniqueThreshold
-                                || eqLeftColStats.ndv / leftStatsRowCount > almostUniqueThreshold;
-                        if (!trustable) {
-                            double rNdv = StatsMathUtil.nonZeroDivisor(eqRightColStats.ndv);
-                            double lNdv = StatsMathUtil.nonZeroDivisor(eqLeftColStats.ndv);
-                            if (leftBigger) {
-                                unTrustEqualRatio.add((rightStatsRowCount / rNdv)
-                                        * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / lNdv);
-                            } else {
-                                unTrustEqualRatio.add((leftStatsRowCount / lNdv)
-                                        * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / rNdv);
+                        expression -> {
+                            // since ndv is not accurate, if ndv/rowcount < almostUniqueThreshold,
+                            // this column is regarded as unique.
+                            double almostUniqueThreshold = 0.9;
+                            EqualTo equal = normalizeHashJoinCondition(expression, leftStats, rightStats);
+                            ColumnStatistic eqLeftColStats = ExpressionEstimation.estimate(equal.left(), leftStats);
+                            ColumnStatistic eqRightColStats = ExpressionEstimation.estimate(equal.right(), rightStats);
+                            double rightStatsRowCount = StatsMathUtil.nonZeroDivisor(rightStats.getRowCount());
+                            double leftStatsRowCount = StatsMathUtil.nonZeroDivisor(leftStats.getRowCount());
+                            boolean trustable = eqRightColStats.ndv / rightStatsRowCount > almostUniqueThreshold
+                                    || eqLeftColStats.ndv / leftStatsRowCount > almostUniqueThreshold;
+                            if (!trustable) {
+                                double rNdv = StatsMathUtil.nonZeroDivisor(eqRightColStats.ndv);
+                                double lNdv = StatsMathUtil.nonZeroDivisor(eqLeftColStats.ndv);
+                                if (leftBigger) {
+                                    unTrustEqualRatio.add((rightStatsRowCount / rNdv)
+                                            * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / lNdv);
+                                } else {
+                                    unTrustEqualRatio.add((leftStatsRowCount / lNdv)
+                                            * Math.min(eqLeftColStats.ndv, eqRightColStats.ndv) / rNdv);
+                                }
+                                unTrustableCondition.add(equal);
                             }
-                            unTrustableCondition.add(equal);
+                            return trustable;
                         }
-                        return trustable;
-                    }
                 ).collect(Collectors.toList());
 
         Statistics innerJoinStats;
@@ -159,6 +150,34 @@ public class JoinEstimation {
             }
             innerJoinStats = crossJoinStats.updateRowCountOnly(outputRowCount);
         }
+        return innerJoinStats;
+    }
+
+    private static Statistics estimateNestLoopJoin(Statistics leftStats, Statistics rightStats, Join join) {
+        return new StatisticsBuilder()
+                .setRowCount(Math.max(1, leftStats.getRowCount() * rightStats.getRowCount()))
+                .putColumnStatistics(leftStats.columnStatistics())
+                .putColumnStatistics(rightStats.columnStatistics())
+                .build();
+    }
+
+    private static Statistics estimateInnerJoin(Statistics leftStats, Statistics rightStats, Join join) {
+        if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, join)) {
+            double rowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount());
+            rowCount = Math.max(1, rowCount);
+            return new StatisticsBuilder()
+                .setRowCount(rowCount)
+                .putColumnStatistics(leftStats.columnStatistics())
+                .putColumnStatistics(rightStats.columnStatistics())
+                .build();
+        }
+
+        Statistics innerJoinStats;
+        if (join.getHashJoinConjuncts().isEmpty()) {
+            innerJoinStats = estimateNestLoopJoin(leftStats, rightStats, join);
+        } else {
+            innerJoinStats = estimateHashJoin(leftStats, rightStats, join);
+        }
 
         if (!join.getOtherJoinConjuncts().isEmpty()) {
             FilterEstimation filterEstimation = new FilterEstimation();
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
index 42fdc0d063..dee0257716 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
@@ -44,24 +44,24 @@ CteAnchor[cteId= ( CTEId#4=] )
 ----PhysicalDistribute
 ------PhysicalTopN
 --------PhysicalProject
-----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END > CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END)
+----------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END > CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END)
 ------------PhysicalDistribute
 --------------PhysicalProject
-----------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w'))
+----------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2002))
 ------------------CteConsumer[cteId= ( CTEId#4=] )
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)
-----------------PhysicalDistribute
-------------------PhysicalProject
---------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2002))
-----------------------CteConsumer[cteId= ( CTEId#4=] )
-----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
+--------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
+----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)
 ------------------PhysicalDistribute
 --------------------PhysicalProject
-----------------------filter((t_s_firstyear.dyear = 2001)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.00))
+----------------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w'))
 ------------------------CteConsumer[cteId= ( CTEId#4=] )
 ------------------PhysicalDistribute
 --------------------PhysicalProject
-----------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001))
+----------------------filter((t_s_firstyear.dyear = 2001)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.00))
 ------------------------CteConsumer[cteId= ( CTEId#4=] )
+----------------PhysicalDistribute
+------------------PhysicalProject
+--------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001))
+----------------------CteConsumer[cteId= ( CTEId#4=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out
index e2953ab328..6ddc252783 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query18.out
@@ -9,40 +9,41 @@ PhysicalTopN
 ------------hashAgg[LOCAL]
 --------------PhysicalRepeat
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
---------------------PhysicalProject
-----------------------PhysicalOlapScan[item]
+------------------hashJoin[INNER_JOIN](customer.c_current_cdemo_sk = cd2.cd_demo_sk)
 --------------------PhysicalDistribute
 ----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN](customer.c_current_cdemo_sk = cd2.cd_demo_sk)
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer_demographics]
+------------------------PhysicalOlapScan[customer_demographics]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = item.i_item_sk)
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[item]
 --------------------------PhysicalDistribute
 ----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
---------------------------------PhysicalProject
-----------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk)
-------------------------------------PhysicalProject
---------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)
-----------------------------------------PhysicalProject
-------------------------------------------PhysicalOlapScan[catalog_sales]
-----------------------------------------PhysicalDistribute
-------------------------------------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = customer_address.ca_address_sk)
---------------------------------------------PhysicalDistribute
-----------------------------------------------PhysicalProject
-------------------------------------------------filter(c_birth_month IN (1, 2, 4, 7, 8, 10))
---------------------------------------------------PhysicalOlapScan[customer]
---------------------------------------------PhysicalDistribute
+------------------------------hashJoin[INNER_JOIN](customer.c_current_addr_sk = customer_address.ca_address_sk)
+--------------------------------PhysicalDistribute
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)
+--------------------------------------PhysicalDistribute
+----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
+------------------------------------------PhysicalProject
+--------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk = cd1.cd_demo_sk)
 ----------------------------------------------PhysicalProject
-------------------------------------------------filter(ca_state IN ('WA', 'GA', 'NC', 'ME', 'WY', 'OK', 'IN'))
---------------------------------------------------PhysicalOlapScan[customer_address]
-------------------------------------PhysicalDistribute
---------------------------------------PhysicalProject
-----------------------------------------filter((cast(cd_gender as VARCHAR(*)) = 'F')(cast(cd_education_status as VARCHAR(*)) = 'Advanced Degree'))
-------------------------------------------PhysicalOlapScan[customer_demographics]
+------------------------------------------------PhysicalOlapScan[catalog_sales]
+----------------------------------------------PhysicalDistribute
+------------------------------------------------PhysicalProject
+--------------------------------------------------filter((cast(cd_gender as VARCHAR(*)) = 'F')(cast(cd_education_status as VARCHAR(*)) = 'Advanced Degree'))
+----------------------------------------------------PhysicalOlapScan[customer_demographics]
+------------------------------------------PhysicalDistribute
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((date_dim.d_year = 1998))
+------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------PhysicalDistribute
+----------------------------------------PhysicalProject
+------------------------------------------filter(c_birth_month IN (1, 2, 4, 7, 8, 10))
+--------------------------------------------PhysicalOlapScan[customer]
 --------------------------------PhysicalDistribute
 ----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_year = 1998))
---------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------filter(ca_state IN ('WA', 'GA', 'NC', 'ME', 'WY', 'OK', 'IN'))
+--------------------------------------PhysicalOlapScan[customer_address]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out
index c4b812b0b0..b1ba26b82e 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out
@@ -65,35 +65,35 @@ CteAnchor[cteId= ( CTEId#6=] )
 ------PhysicalTopN
 --------PhysicalProject
 ----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
-------------PhysicalProject
---------------filter((t_w_secyear.sale_type = 'w')(t_w_secyear.dyear = 2000))
-----------------CteConsumer[cteId= ( CTEId#6=] )
 ------------PhysicalDistribute
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
+----------------filter((t_w_secyear.sale_type = 'w')(t_w_secyear.dyear = 2000))
+------------------CteConsumer[cteId= ( CTEId#6=] )
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
+----------------PhysicalDistribute
 ------------------PhysicalProject
 --------------------filter((t_w_firstyear.dyear = 1999)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.year_total > 0.000000))
 ----------------------CteConsumer[cteId= ( CTEId#6=] )
-------------------PhysicalDistribute
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2000))
+--------------------------CteConsumer[cteId= ( CTEId#6=] )
 --------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
+----------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_secyear.customer_id)
 ------------------------PhysicalDistribute
 --------------------------PhysicalProject
-----------------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2000))
+----------------------------filter((t_c_secyear.sale_type = 'c')(t_c_secyear.dyear = 2000))
 ------------------------------CteConsumer[cteId= ( CTEId#6=] )
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_secyear.customer_id)
-----------------------------PhysicalDistribute
-------------------------------PhysicalProject
---------------------------------filter((t_c_secyear.sale_type = 'c')(t_c_secyear.dyear = 2000))
-----------------------------------CteConsumer[cteId= ( CTEId#6=] )
-----------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_firstyear.customer_id)
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((t_s_firstyear.year_total > 0.000000)(t_s_firstyear.dyear = 1999)(t_s_firstyear.sale_type = 's'))
-------------------------------------CteConsumer[cteId= ( CTEId#6=] )
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((t_c_firstyear.year_total > 0.000000)(t_c_firstyear.dyear = 1999)(t_c_firstyear.sale_type = 'c'))
-------------------------------------CteConsumer[cteId= ( CTEId#6=] )
+------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_firstyear.customer_id)
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((t_s_firstyear.year_total > 0.000000)(t_s_firstyear.dyear = 1999)(t_s_firstyear.sale_type = 's'))
+--------------------------------CteConsumer[cteId= ( CTEId#6=] )
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((t_c_firstyear.year_total > 0.000000)(t_c_firstyear.dyear = 1999)(t_c_firstyear.sale_type = 'c'))
+--------------------------------CteConsumer[cteId= ( CTEId#6=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out
index c1f925d45b..07a0ec06da 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out
@@ -45,15 +45,7 @@ CteAnchor[cteId= ( CTEId#4=] )
 ------PhysicalTopN
 --------PhysicalProject
 ----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)(CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END > CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END)
-------------PhysicalDistribute
---------------PhysicalProject
-----------------filter((t_w_firstyear.year = 1999)(t_w_firstyear.year_total > 0.0)(t_w_firstyear.sale_type = 'w'))
-------------------CteConsumer[cteId= ( CTEId#4=] )
 ------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)
---------------PhysicalDistribute
-----------------PhysicalProject
-------------------filter((t_w_secyear.year = 2000)(t_w_secyear.sale_type = 'w'))
---------------------CteConsumer[cteId= ( CTEId#4=] )
 --------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)
 ----------------PhysicalDistribute
 ------------------PhysicalProject
@@ -63,4 +55,12 @@ CteAnchor[cteId= ( CTEId#4=] )
 ------------------PhysicalProject
 --------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.year = 2000))
 ----------------------CteConsumer[cteId= ( CTEId#4=] )
+--------------PhysicalDistribute
+----------------PhysicalProject
+------------------filter((t_w_secyear.year = 2000)(t_w_secyear.sale_type = 'w'))
+--------------------CteConsumer[cteId= ( CTEId#4=] )
+------------PhysicalDistribute
+--------------PhysicalProject
+----------------filter((t_w_firstyear.year = 1999)(t_w_firstyear.year_total > 0.0)(t_w_firstyear.sale_type = 'w'))
+------------------CteConsumer[cteId= ( CTEId#4=] )
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out
index c1d1c3548a..404026aa64 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out
@@ -4,22 +4,25 @@ PhysicalTopN
 --PhysicalDistribute
 ----PhysicalTopN
 ------PhysicalProject
---------hashAgg[LOCAL]
-----------PhysicalProject
-------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
---------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
-----------------PhysicalProject
-------------------PhysicalOlapScan[customer]
-----------------PhysicalDistribute
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
+--------------------filter((lineitem.l_returnflag = 'R'))
+----------------------PhysicalOlapScan[lineitem]
+------------------PhysicalDistribute
+--------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
 ----------------------PhysicalProject
-------------------------filter((lineitem.l_returnflag = 'R'))
---------------------------PhysicalOlapScan[lineitem]
-----------------------PhysicalProject
-------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
---------------------------PhysicalOlapScan[orders]
---------------PhysicalDistribute
-----------------PhysicalProject
-------------------PhysicalOlapScan[nation]
+------------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
+--------------------------------PhysicalOlapScan[orders]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out
index f8211c8fa8..6d45aeda6b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q5.out
@@ -8,8 +8,6 @@ PhysicalQuickSort
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)(customer.c_nationkey = supplier.s_nationkey)
-----------------PhysicalProject
-------------------PhysicalOlapScan[customer]
 ----------------PhysicalDistribute
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
@@ -32,4 +30,7 @@ PhysicalQuickSort
 ----------------------PhysicalProject
 ------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01))
 --------------------------PhysicalOlapScan[orders]
+----------------PhysicalDistribute
+------------------PhysicalProject
+--------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out
index c1d1c3548a..404026aa64 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q10.out
@@ -4,22 +4,25 @@ PhysicalTopN
 --PhysicalDistribute
 ----PhysicalTopN
 ------PhysicalProject
---------hashAgg[LOCAL]
-----------PhysicalProject
-------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
---------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
-----------------PhysicalProject
-------------------PhysicalOlapScan[customer]
-----------------PhysicalDistribute
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
+--------------------filter((lineitem.l_returnflag = 'R'))
+----------------------PhysicalOlapScan[lineitem]
+------------------PhysicalDistribute
+--------------------hashJoin[INNER_JOIN](customer.c_nationkey = nation.n_nationkey)
 ----------------------PhysicalProject
-------------------------filter((lineitem.l_returnflag = 'R'))
---------------------------PhysicalOlapScan[lineitem]
-----------------------PhysicalProject
-------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
---------------------------PhysicalOlapScan[orders]
---------------PhysicalDistribute
-----------------PhysicalProject
-------------------PhysicalOlapScan[nation]
+------------------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((orders.o_orderdate < 1994-01-01)(orders.o_orderdate >= 1993-10-01))
+--------------------------------PhysicalOlapScan[orders]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out
index f8211c8fa8..6d45aeda6b 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q5.out
@@ -8,8 +8,6 @@ PhysicalQuickSort
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------hashJoin[INNER_JOIN](customer.c_custkey = orders.o_custkey)(customer.c_nationkey = supplier.s_nationkey)
-----------------PhysicalProject
-------------------PhysicalOlapScan[customer]
 ----------------PhysicalDistribute
 ------------------PhysicalProject
 --------------------hashJoin[INNER_JOIN](lineitem.l_orderkey = orders.o_orderkey)
@@ -32,4 +30,7 @@ PhysicalQuickSort
 ----------------------PhysicalProject
 ------------------------filter((orders.o_orderdate < 1995-01-01)(orders.o_orderdate >= 1994-01-01))
 --------------------------PhysicalOlapScan[orders]
+----------------PhysicalDistribute
+------------------PhysicalProject
+--------------------PhysicalOlapScan[customer]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org