You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by en...@apache.org on 2023/06/25 13:47:38 UTC

[doris] branch master updated: [refactor](nereids) update Agg stats derive method #21036

This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d1163c4d8 [refactor](nereids) update Agg stats derive method #21036
2d1163c4d8 is described below

commit 2d1163c4d8e051f822aa9c63affd8a360631f075
Author: minghong <en...@gmail.com>
AuthorDate: Sun Jun 25 21:47:32 2023 +0800

    [refactor](nereids) update Agg stats derive method #21036
    
    This pr has no effect on tpch queries.
    Some tpcds queries are impacted.
    They are 4/11/23/24/47/51/57/65/74, in which 4 and 51 are improved
---
 .../org/apache/doris/nereids/cost/CostModelV1.java | 15 +++--
 .../apache/doris/nereids/memo/GroupExpression.java |  8 +--
 .../doris/nereids/stats/StatsCalculator.java       | 74 ++++++++++++++--------
 .../nereids/jobs/cascades/DeriveStatsJobTest.java  |  2 +-
 .../nereids_tpcds_shape_sf100_p0/shape/query11.out | 26 ++++----
 .../nereids_tpcds_shape_sf100_p0/shape/query23.out | 52 ++++++++-------
 .../nereids_tpcds_shape_sf100_p0/shape/query24.out | 23 ++++---
 .../nereids_tpcds_shape_sf100_p0/shape/query4.out  | 49 +++++++-------
 .../nereids_tpcds_shape_sf100_p0/shape/query47.out | 15 +++--
 .../nereids_tpcds_shape_sf100_p0/shape/query51.out | 12 ++--
 .../nereids_tpcds_shape_sf100_p0/shape/query57.out | 15 +++--
 .../nereids_tpcds_shape_sf100_p0/shape/query65.out |  4 +-
 .../nereids_tpcds_shape_sf100_p0/shape/query74.out | 35 +++++-----
 13 files changed, 181 insertions(+), 149 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index c59b32175c..713cbf0e00 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -65,6 +65,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
     // the penalty factor is no more than BROADCAST_JOIN_SKEW_PENALTY_LIMIT
     static final double BROADCAST_JOIN_SKEW_RATIO = 30.0;
     static final double BROADCAST_JOIN_SKEW_PENALTY_LIMIT = 2.0;
+    private int beNumber = Math.max(1, ConnectContext.get().getEnv().getClusterInfo().getBackendsNumber(true));
 
     public static Cost addChildCost(Plan plan, Cost planCost, Cost childCost, int index) {
         Preconditions.checkArgument(childCost instanceof CostV1 && planCost instanceof CostV1);
@@ -171,8 +172,6 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
         Statistics childStatistics = context.getChildStatistics(0);
         double intputRowCount = childStatistics.getRowCount();
         DistributionSpec spec = distribute.getDistributionSpec();
-        int beNumber = ConnectContext.get().getEnv().getClusterInfo().getBackendsNumber(true);
-        beNumber = Math.max(1, beNumber);
 
         // shuffle
         if (spec instanceof DistributionSpecHash) {
@@ -221,11 +220,15 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
     @Override
     public Cost visitPhysicalHashAggregate(
             PhysicalHashAggregate<? extends Plan> aggregate, PlanContext context) {
-        // TODO: stage.....
-
-        Statistics statistics = context.getStatisticsWithCheck();
         Statistics inputStatistics = context.getChildStatistics(0);
-        return CostV1.of(inputStatistics.getRowCount(), statistics.getRowCount(), 0);
+        if (aggregate.getAggPhase().isLocal()) {
+            return CostV1.of(inputStatistics.getRowCount() / beNumber,
+                    inputStatistics.getRowCount() / beNumber, 0);
+        } else {
+            // global
+            return CostV1.of(inputStatistics.getRowCount(),
+                    inputStatistics.getRowCount(), 0);
+        }
     }
 
     private double broadCastJoinBalancePenalty(Statistics probeStats, Statistics buildStats) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
index 1757cd7dac..008ed0e817 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
@@ -325,10 +325,10 @@ public class GroupExpression {
         }
         builder.append(" cost=").append(format.format((long) cost));
         builder.append(" estRows=").append(format.format(estOutputRowCount));
-        builder.append(" (plan=").append(plan.toString()).append(") children=[");
-        builder.append(Joiner.on(", ").join(
-                children.stream().map(Group::getGroupId).collect(Collectors.toList())));
-        builder.append("]");
+        builder.append(" children=[").append(Joiner.on(", ").join(
+                        children.stream().map(Group::getGroupId).collect(Collectors.toList())))
+                .append(" ]");
+        builder.append(" (plan=").append(plan.toString()).append(")");
         return builder.toString();
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 923eb68c7d..38856f44ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -139,6 +139,9 @@ import java.util.stream.Collectors;
  */
 public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
     public static double DEFAULT_AGGREGATE_RATIO = 0.5;
+    public static double DEFAULT_AGGREGATE_EXPAND_RATIO = 1.05;
+
+    public static double AGGREGATE_COLUMN_CORRELATION_COEFFICIENT = 0.75;
     public static double DEFAULT_COLUMN_NDV_RATIO = 0.5;
 
     private static final Logger LOG = LogManager.getLogger(StatsCalculator.class);
@@ -664,45 +667,66 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
         return stats.withRowCount(Math.min(stats.getRowCount(), limit.getLimit()));
     }
 
-    private Statistics computeAggregate(Aggregate<? extends Plan> aggregate) {
-        // TODO: since we have no column stats here. just use a fix ratio to compute the row count.
-        List<Expression> groupByExpressions = aggregate.getGroupByExpressions();
-        Statistics childStats = groupExpression.childStatistics(0);
-        double resultSetCount = 1;
-        if (!groupByExpressions.isEmpty()) {
-            Map<Expression, ColumnStatistic> childSlotToColumnStats = childStats.columnStatistics();
-            double inputRowCount = childStats.getRowCount();
-            if (inputRowCount != 0) {
-                List<ColumnStatistic> groupByKeyStats = groupByExpressions.stream()
-                        .filter(childSlotToColumnStats::containsKey)
-                        .map(childSlotToColumnStats::get)
-                        .filter(s -> !s.isUnKnown)
-                        .collect(Collectors.toList());
-                if (groupByKeyStats.isEmpty()) {
-                    //all column stats are unknown, use default ratio
-                    resultSetCount = inputRowCount * DEFAULT_AGGREGATE_RATIO;
-                } else {
-                    resultSetCount = groupByKeyStats.stream().map(s -> s.ndv)
-                            .max(Double::compare).get();
+    private double estimateGroupByRowCount(List<Expression> groupByExpressions, Statistics childStats) {
+        double rowCount = 1;
+        Map<Expression, ColumnStatistic> groupByColStats = new HashMap<>();
+        for (Expression groupByExpr : groupByExpressions) {
+            ColumnStatistic colStats = childStats.findColumnStatistics(groupByExpr);
+            if (colStats == null) {
+                colStats = ExpressionEstimation.estimate(groupByExpr, childStats);
+            }
+            groupByColStats.put(groupByExpr, colStats);
+        }
+        int groupByCount = groupByExpressions.size();
+        if (groupByColStats.values().stream().anyMatch(ColumnStatistic::isUnKnown)) {
+            if (groupByCount > 0) {
+                rowCount *= DEFAULT_AGGREGATE_RATIO * Math.pow(DEFAULT_AGGREGATE_EXPAND_RATIO, groupByCount - 1);
+            }
+            if (rowCount > childStats.getRowCount()) {
+                rowCount = childStats.getRowCount();
+            }
+        } else {
+            if (groupByCount > 0) {
+                List<Double> groupByNdvs = groupByColStats.values().stream()
+                        .map(colStats -> colStats.ndv)
+                        .sorted().collect(Collectors.toList());
+                rowCount = groupByNdvs.get(0);
+                for (int groupByIndex = 1; groupByIndex < groupByCount; ++groupByIndex) {
+                    rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) * Math.pow(
+                            AGGREGATE_COLUMN_CORRELATION_COEFFICIENT, groupByIndex + 1D));
+                    if (rowCount > childStats.getRowCount()) {
+                        rowCount = childStats.getRowCount();
+                        break;
+                    }
                 }
             }
         }
-        resultSetCount = Math.min(resultSetCount, childStats.getRowCount());
+        rowCount = Math.max(1, rowCount);
+        rowCount = Math.min(rowCount, childStats.getRowCount());
+        return rowCount;
+    }
+
+    private Statistics computeAggregate(Aggregate<? extends Plan> aggregate) {
+        List<Expression> groupByExpressions = aggregate.getGroupByExpressions();
+        Statistics childStats = groupExpression.childStatistics(0);
+        double rowCount = estimateGroupByRowCount(groupByExpressions, childStats);
         Map<Expression, ColumnStatistic> slotToColumnStats = Maps.newHashMap();
         List<NamedExpression> outputExpressions = aggregate.getOutputExpressions();
         // TODO: 1. Estimate the output unit size by the type of corresponding AggregateFunction
         //       2. Handle alias, literal in the output expression list
-        double factor = childStats.getRowCount() / resultSetCount;
+        double factor = childStats.getRowCount() / rowCount;
         for (NamedExpression outputExpression : outputExpressions) {
             ColumnStatistic columnStat = ExpressionEstimation.estimate(outputExpression, childStats);
             ColumnStatisticBuilder builder = new ColumnStatisticBuilder(columnStat);
             builder.setMinValue(columnStat.minValue / factor);
             builder.setMaxValue(columnStat.maxValue / factor);
-            builder.setNdv(resultSetCount);
-            builder.setDataSize(resultSetCount * outputExpression.getDataType().width());
+            if (columnStat.ndv > rowCount) {
+                builder.setNdv(rowCount);
+            }
+            builder.setDataSize(rowCount * outputExpression.getDataType().width());
             slotToColumnStats.put(outputExpression.toSlot(), columnStat);
         }
-        return new Statistics(resultSetCount, slotToColumnStats, childStats.getWidth(),
+        return new Statistics(rowCount, slotToColumnStats, childStats.getWidth(),
                 childStats.getPenalty() + childStats.getRowCount());
         // TODO: Update ColumnStats properly, add new mapping from output slot to ColumnStats
     }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java
index 0bc2b139d8..3fff39d2de 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java
@@ -67,7 +67,7 @@ public class DeriveStatsJobTest {
         }
         Statistics statistics = cascadesContext.getMemo().getRoot().getStatistics();
         Assertions.assertNotNull(statistics);
-        Assertions.assertTrue(Precision.equals(0.5, statistics.getRowCount(), 0.1));
+        Assertions.assertTrue(Precision.equals(1, statistics.getRowCount(), 0.1));
     }
 
     private LogicalOlapScan constructOlapSCan() {
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
index f1fd504809..a25aced303 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query11.out
@@ -46,23 +46,23 @@ CteAnchor[cteId= ( CTEId#4=] )
 ------PhysicalTopN
 --------PhysicalProject
 ----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END > CASE WHEN (year_total > 0.00) THEN (cast(year_total as DECIMALV3(38, 8)) / year_total) ELSE 0.000000 END)
-------------PhysicalProject
---------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w'))
-----------------CteConsumer[cteId= ( CTEId#4=] )
 ------------PhysicalDistribute
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)
+----------------filter((t_w_secyear.dyear = 2002)(t_w_secyear.sale_type = 'w'))
+------------------CteConsumer[cteId= ( CTEId#4=] )
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)
+----------------PhysicalDistribute
 ------------------PhysicalProject
 --------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2002))
 ----------------------CteConsumer[cteId= ( CTEId#4=] )
+----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
 ------------------PhysicalDistribute
---------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------filter((t_s_firstyear.dyear = 2001)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.00))
-----------------------------CteConsumer[cteId= ( CTEId#4=] )
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001))
-----------------------------CteConsumer[cteId= ( CTEId#4=] )
+--------------------PhysicalProject
+----------------------filter((t_s_firstyear.dyear = 2001)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.00))
+------------------------CteConsumer[cteId= ( CTEId#4=] )
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------filter((t_w_firstyear.year_total > 0.00)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.dyear = 2001))
+------------------------CteConsumer[cteId= ( CTEId#4=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
index 5e59776f67..9f4a6bcf94 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
@@ -62,37 +62,41 @@ CteAnchor[cteId= ( CTEId#1=] )
 ------------hashAgg[LOCAL]
 --------------PhysicalUnion
 ----------------PhysicalProject
-------------------hashJoin[LEFT_SEMI_JOIN](catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)
+------------------hashJoin[RIGHT_SEMI_JOIN](catalog_sales.cs_item_sk = frequent_ss_items.item_sk)
 --------------------PhysicalDistribute
-----------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
-------------------------hashJoin[LEFT_SEMI_JOIN](catalog_sales.cs_item_sk = frequent_ss_items.item_sk)
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_sales]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------CteConsumer[cteId= ( CTEId#1=] )
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
-------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalProject
+------------------------CteConsumer[cteId= ( CTEId#1=] )
 --------------------PhysicalDistribute
 ----------------------PhysicalProject
-------------------------CteConsumer[cteId= ( CTEId#4=] )
+------------------------hashJoin[LEFT_SEMI_JOIN](catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)
+--------------------------PhysicalDistribute
+----------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales]
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
+------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------CteConsumer[cteId= ( CTEId#4=] )
 ----------------PhysicalProject
-------------------hashJoin[RIGHT_SEMI_JOIN](web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)
+------------------hashJoin[RIGHT_SEMI_JOIN](web_sales.ws_item_sk = frequent_ss_items.item_sk)
 --------------------PhysicalDistribute
 ----------------------PhysicalProject
-------------------------CteConsumer[cteId= ( CTEId#4=] )
+------------------------CteConsumer[cteId= ( CTEId#1=] )
 --------------------PhysicalDistribute
-----------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
-------------------------hashJoin[LEFT_SEMI_JOIN](web_sales.ws_item_sk = frequent_ss_items.item_sk)
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_sales]
+----------------------PhysicalProject
+------------------------hashJoin[LEFT_SEMI_JOIN](web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)
+--------------------------PhysicalDistribute
+----------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales]
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
+------------------------------------PhysicalOlapScan[date_dim]
 --------------------------PhysicalDistribute
 ----------------------------PhysicalProject
-------------------------------CteConsumer[cteId= ( CTEId#1=] )
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------filter((date_dim.d_year = 2000)(date_dim.d_moy = 5))
-------------------------------PhysicalOlapScan[date_dim]
+------------------------------CteConsumer[cteId= ( CTEId#4=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
index c887e96371..70f424e95c 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
@@ -32,22 +32,21 @@ CteAnchor[cteId= ( CTEId#0=] )
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[store_returns]
 --PhysicalQuickSort
-----PhysicalDistribute
-------PhysicalQuickSort
---------PhysicalProject
-----------NestedLoopJoin[INNER_JOIN](cast(paid as DOUBLE) > cast((0.05 * avg(netpaid)) as DOUBLE))
+----PhysicalQuickSort
+------PhysicalProject
+--------NestedLoopJoin[INNER_JOIN](cast(paid as DOUBLE) > cast((0.05 * avg(netpaid)) as DOUBLE))
+----------PhysicalAssertNumRows
+------------PhysicalProject
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------CteConsumer[cteId= ( CTEId#0=] )
+----------PhysicalDistribute
 ------------hashAgg[GLOBAL]
 --------------PhysicalDistribute
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
 --------------------filter((cast(i_color as VARCHAR(*)) = 'beige'))
 ----------------------CteConsumer[cteId= ( CTEId#0=] )
-------------PhysicalDistribute
---------------PhysicalAssertNumRows
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------CteConsumer[cteId= ( CTEId#0=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out
index 58576e4d74..5db6313ba2 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out
@@ -65,35 +65,36 @@ CteAnchor[cteId= ( CTEId#6=] )
 ----PhysicalDistribute
 ------PhysicalTopN
 --------PhysicalProject
-----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
+----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
 ------------PhysicalProject
---------------filter((t_w_firstyear.dyear = 1999)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.year_total > 0.000000))
+--------------filter((t_w_secyear.sale_type = 'w')(t_w_secyear.dyear = 2000))
 ----------------CteConsumer[cteId= ( CTEId#6=] )
 ------------PhysicalDistribute
---------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)
-----------------PhysicalProject
-------------------filter((t_w_secyear.sale_type = 'w')(t_w_secyear.dyear = 2000))
---------------------CteConsumer[cteId= ( CTEId#6=] )
-----------------PhysicalDistribute
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
-----------------------PhysicalProject
-------------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2000))
---------------------------CteConsumer[cteId= ( CTEId#6=] )
-----------------------PhysicalDistribute
+--------------------filter((t_w_firstyear.dyear = 1999)(t_w_firstyear.sale_type = 'w')(t_w_firstyear.year_total > 0.000000))
+----------------------CteConsumer[cteId= ( CTEId#6=] )
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END > CASE WHEN (year_total > 0.000000) THEN (cast(year_total as DECIMALV3(38, 16)) / year_total) ELSE NULL END)
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.dyear = 2000))
+------------------------------CteConsumer[cteId= ( CTEId#6=] )
 ------------------------PhysicalProject
 --------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_secyear.customer_id)
-----------------------------PhysicalProject
-------------------------------filter((t_c_secyear.sale_type = 'c')(t_c_secyear.dyear = 2000))
---------------------------------CteConsumer[cteId= ( CTEId#6=] )
 ----------------------------PhysicalDistribute
-------------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_firstyear.customer_id)
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((t_s_firstyear.year_total > 0.000000)(t_s_firstyear.dyear = 1999)(t_s_firstyear.sale_type = 's'))
---------------------------------------CteConsumer[cteId= ( CTEId#6=] )
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((t_c_firstyear.year_total > 0.000000)(t_c_firstyear.dyear = 1999)(t_c_firstyear.sale_type = 'c'))
---------------------------------------CteConsumer[cteId= ( CTEId#6=] )
+------------------------------PhysicalProject
+--------------------------------filter((t_c_secyear.sale_type = 'c')(t_c_secyear.dyear = 2000))
+----------------------------------CteConsumer[cteId= ( CTEId#6=] )
+----------------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_c_firstyear.customer_id)
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((t_s_firstyear.year_total > 0.000000)(t_s_firstyear.dyear = 1999)(t_s_firstyear.sale_type = 's'))
+------------------------------------CteConsumer[cteId= ( CTEId#6=] )
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((t_c_firstyear.year_total > 0.000000)(t_c_firstyear.dyear = 1999)(t_c_firstyear.sale_type = 'c'))
+------------------------------------CteConsumer[cteId= ( CTEId#6=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out
index ad7a504ed2..5d2dff0b76 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out
@@ -35,15 +35,16 @@ CteAnchor[cteId= ( CTEId#0=] )
 ------PhysicalDistribute
 --------PhysicalTopN
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN](s_store_name = v1_lag.s_store_name)(v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.s_company_name = v1_lag.s_company_name)(v1.rn = expr_(rn + 1))
+------------hashJoin[INNER_JOIN](s_store_name = v1_lead.s_store_name)(v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.s_company_name = v1_lead.s_company_name)(v1.rn = expr_(rn - 1))
 --------------PhysicalProject
 ----------------CteConsumer[cteId= ( CTEId#0=] )
 --------------PhysicalDistribute
-----------------hashJoin[INNER_JOIN](s_store_name = v1_lead.s_store_name)(v1.i_category = v1_lead.i_category)(v1.i_brand = v1_lead.i_brand)(v1.s_company_name = v1_lead.s_company_name)(v1.rn = expr_(rn - 1))
-------------------PhysicalProject
---------------------CteConsumer[cteId= ( CTEId#0=] )
-------------------PhysicalDistribute
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN](s_store_name = v1_lag.s_store_name)(v1.i_category = v1_lag.i_category)(v1.i_brand = v1_lag.i_brand)(v1.s_company_name = v1_lag.s_company_name)(v1.rn = expr_(rn + 1))
 --------------------PhysicalProject
-----------------------filter((CASE WHEN (avg_monthly_sales > 0.0000) THEN (abs((cast(sum_sales as DOUBLE) - cast(avg_monthly_sales as DOUBLE))) / cast(avg_monthly_sales as DOUBLE)) ELSE NULL END > 0.1)(v2.d_year = 2001)(v2.avg_monthly_sales > 0.0000))
-------------------------CteConsumer[cteId= ( CTEId#0=] )
+----------------------CteConsumer[cteId= ( CTEId#0=] )
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------filter((CASE WHEN (avg_monthly_sales > 0.0000) THEN (abs((cast(sum_sales as DOUBLE) - cast(avg_monthly_sales as DOUBLE))) / cast(avg_monthly_sales as DOUBLE)) ELSE NULL END > 0.1)(v2.d_year = 2001)(v2.avg_monthly_sales > 0.0000))
+--------------------------CteConsumer[cteId= ( CTEId#0=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out
index a749fae1b2..50af4041a8 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query51.out
@@ -18,12 +18,12 @@ PhysicalTopN
 ------------------------------PhysicalDistribute
 --------------------------------hashAgg[LOCAL]
 ----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
+------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
 --------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[web_sales]
+----------------------------------------PhysicalOlapScan[store_sales]
 --------------------------------------PhysicalDistribute
 ----------------------------------------PhysicalProject
-------------------------------------------filter((date_dim.d_month_seq >= 1216)(date_dim.d_month_seq <= 1227))
+------------------------------------------filter((date_dim.d_month_seq <= 1227)(date_dim.d_month_seq >= 1216))
 --------------------------------------------PhysicalOlapScan[date_dim]
 ------------------PhysicalDistribute
 --------------------PhysicalProject
@@ -34,11 +34,11 @@ PhysicalTopN
 ------------------------------PhysicalDistribute
 --------------------------------hashAgg[LOCAL]
 ----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
+------------------------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
 --------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[store_sales]
+----------------------------------------PhysicalOlapScan[web_sales]
 --------------------------------------PhysicalDistribute
 ----------------------------------------PhysicalProject
-------------------------------------------filter((date_dim.d_month_seq <= 1227)(date_dim.d_month_seq >= 1216))
+------------------------------------------filter((date_dim.d_month_seq >= 1216)(date_dim.d_month_seq <= 1227))
 --------------------------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out
index 0ac303c1e8..983aa5cd7e 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out
@@ -35,15 +35,16 @@ CteAnchor[cteId= ( CTEId#0=] )
 ------PhysicalDistribute
 --------PhysicalTopN
 ----------PhysicalProject
-------------hashJoin[INNER_JOIN](i_brand = v1_lag.i_brand)(v1.i_category = v1_lag.i_category)(v1.cc_name = v1_lag.cc_name)(v1.rn = expr_(rn + 1))
+------------hashJoin[INNER_JOIN](i_brand = v1_lead.i_brand)(v1.i_category = v1_lead.i_category)(v1.cc_name = v1_lead.cc_name)(v1.rn = expr_(rn - 1))
 --------------PhysicalProject
 ----------------CteConsumer[cteId= ( CTEId#0=] )
 --------------PhysicalDistribute
-----------------hashJoin[INNER_JOIN](i_brand = v1_lead.i_brand)(v1.i_category = v1_lead.i_category)(v1.cc_name = v1_lead.cc_name)(v1.rn = expr_(rn - 1))
-------------------PhysicalProject
---------------------CteConsumer[cteId= ( CTEId#0=] )
-------------------PhysicalDistribute
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN](i_brand = v1_lag.i_brand)(v1.i_category = v1_lag.i_category)(v1.cc_name = v1_lag.cc_name)(v1.rn = expr_(rn + 1))
 --------------------PhysicalProject
-----------------------filter((CASE WHEN (avg_monthly_sales > 0.0000) THEN (abs((cast(sum_sales as DOUBLE) - cast(avg_monthly_sales as DOUBLE))) / cast(avg_monthly_sales as DOUBLE)) ELSE NULL END > 0.1)(v2.d_year = 1999)(v2.avg_monthly_sales > 0.0000))
-------------------------CteConsumer[cteId= ( CTEId#0=] )
+----------------------CteConsumer[cteId= ( CTEId#0=] )
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------filter((CASE WHEN (avg_monthly_sales > 0.0000) THEN (abs((cast(sum_sales as DOUBLE) - cast(avg_monthly_sales as DOUBLE))) / cast(avg_monthly_sales as DOUBLE)) ELSE NULL END > 0.1)(v2.d_year = 1999)(v2.avg_monthly_sales > 0.0000))
+--------------------------CteConsumer[cteId= ( CTEId#0=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
index f28f35e56e..64a95334d5 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
@@ -5,8 +5,6 @@ PhysicalTopN
 ----PhysicalTopN
 ------PhysicalProject
 --------hashJoin[INNER_JOIN](item.i_item_sk = sc.ss_item_sk)
-----------PhysicalProject
-------------PhysicalOlapScan[item]
 ----------PhysicalDistribute
 ------------PhysicalProject
 --------------hashJoin[INNER_JOIN](store.s_store_sk = sc.ss_store_sk)
@@ -41,4 +39,6 @@ PhysicalTopN
 ----------------PhysicalDistribute
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[store]
+----------PhysicalProject
+------------PhysicalOlapScan[item]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out
index 5c704a265b..c1f925d45b 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query74.out
@@ -44,24 +44,23 @@ CteAnchor[cteId= ( CTEId#4=] )
 ----PhysicalDistribute
 ------PhysicalTopN
 --------PhysicalProject
-----------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)(CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END > CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END)
-------------PhysicalProject
---------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.year = 2000))
-----------------CteConsumer[cteId= ( CTEId#4=] )
+----------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)(CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END > CASE WHEN (year_total > 0.0) THEN (year_total / year_total) ELSE NULL END)
 ------------PhysicalDistribute
 --------------PhysicalProject
-----------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_firstyear.customer_id)
-------------------PhysicalDistribute
---------------------PhysicalProject
-----------------------filter((t_w_firstyear.year = 1999)(t_w_firstyear.year_total > 0.0)(t_w_firstyear.sale_type = 'w'))
-------------------------CteConsumer[cteId= ( CTEId#4=] )
-------------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------filter((t_s_firstyear.year = 1999)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.0))
---------------------------CteConsumer[cteId= ( CTEId#4=] )
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------filter((t_w_secyear.year = 2000)(t_w_secyear.sale_type = 'w'))
---------------------------CteConsumer[cteId= ( CTEId#4=] )
+----------------filter((t_w_firstyear.year = 1999)(t_w_firstyear.year_total > 0.0)(t_w_firstyear.sale_type = 'w'))
+------------------CteConsumer[cteId= ( CTEId#4=] )
+------------hashJoin[INNER_JOIN](t_s_firstyear.customer_id = t_w_secyear.customer_id)
+--------------PhysicalDistribute
+----------------PhysicalProject
+------------------filter((t_w_secyear.year = 2000)(t_w_secyear.sale_type = 'w'))
+--------------------CteConsumer[cteId= ( CTEId#4=] )
+--------------hashJoin[INNER_JOIN](t_s_secyear.customer_id = t_s_firstyear.customer_id)
+----------------PhysicalDistribute
+------------------PhysicalProject
+--------------------filter((t_s_firstyear.year = 1999)(t_s_firstyear.sale_type = 's')(t_s_firstyear.year_total > 0.0))
+----------------------CteConsumer[cteId= ( CTEId#4=] )
+----------------PhysicalDistribute
+------------------PhysicalProject
+--------------------filter((t_s_secyear.sale_type = 's')(t_s_secyear.year = 2000))
+----------------------CteConsumer[cteId= ( CTEId#4=] )
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org