You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by en...@apache.org on 2023/06/29 08:37:12 UTC

[doris] branch master updated: [fix](nereids)update Agg stats estimation #21300

This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 64e9eab0dd [fix](nereids)update Agg stats estimation #21300
64e9eab0dd is described below

commit 64e9eab0dd05231b6d7f46fb441e281e10a69a64
Author: minghong <en...@gmail.com>
AuthorDate: Thu Jun 29 16:37:05 2023 +0800

    [fix](nereids)update Agg stats estimation #21300
    
    Agg stats estimation should use the biggest groupby key's NDV as base, and multiply expansion factor, which is calculated by other groupby key' ndv.
    Before, we use the smallest ndv as base
---
 .../doris/nereids/stats/StatsCalculator.java       |  3 +-
 .../nereids_tpcds_shape_sf100_p0/shape/query24.out | 23 +++++------
 .../nereids_tpcds_shape_sf100_p0/shape/query31.out | 46 +++++++++++-----------
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index acf24961df..385c0227ea 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -127,6 +127,7 @@ import org.apache.logging.log4j.Logger;
 
 import java.util.AbstractMap.SimpleEntry;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -689,7 +690,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
             if (groupByCount > 0) {
                 List<Double> groupByNdvs = groupByColStats.values().stream()
                         .map(colStats -> colStats.ndv)
-                        .sorted().collect(Collectors.toList());
+                        .sorted(Comparator.reverseOrder()).collect(Collectors.toList());
                 rowCount = groupByNdvs.get(0);
                 for (int groupByIndex = 1; groupByIndex < groupByCount; ++groupByIndex) {
                     rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) * Math.pow(
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
index 70f424e95c..c887e96371 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
@@ -32,21 +32,22 @@ CteAnchor[cteId= ( CTEId#0=] )
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[store_returns]
 --PhysicalQuickSort
-----PhysicalQuickSort
-------PhysicalProject
---------NestedLoopJoin[INNER_JOIN](cast(paid as DOUBLE) > cast((0.05 * avg(netpaid)) as DOUBLE))
-----------PhysicalAssertNumRows
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------CteConsumer[cteId= ( CTEId#0=] )
-----------PhysicalDistribute
+----PhysicalDistribute
+------PhysicalQuickSort
+--------PhysicalProject
+----------NestedLoopJoin[INNER_JOIN](cast(paid as DOUBLE) > cast((0.05 * avg(netpaid)) as DOUBLE))
 ------------hashAgg[GLOBAL]
 --------------PhysicalDistribute
 ----------------hashAgg[LOCAL]
 ------------------PhysicalProject
 --------------------filter((cast(i_color as VARCHAR(*)) = 'beige'))
 ----------------------CteConsumer[cteId= ( CTEId#0=] )
+------------PhysicalDistribute
+--------------PhysicalAssertNumRows
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------CteConsumer[cteId= ( CTEId#0=] )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out
index e835f410d8..12a7b7db31 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out
@@ -44,33 +44,33 @@ CteAnchor[cteId= ( CTEId#6=] )
 --------PhysicalQuickSort
 ----------PhysicalProject
 ------------hashJoin[INNER_JOIN](ws1.ca_county = ws3.ca_county)(CASE WHEN (web_sales > 0.00) THEN (cast(web_sales as DECIMALV3(38, 8)) / web_sales) ELSE NULL END > CASE WHEN (store_sales > 0.00) THEN (cast(store_sales as DECIMALV3(38, 8)) / store_sales) ELSE NULL END)
---------------PhysicalProject
-----------------filter((ws3.d_year = 2000)(ws3.d_qoy = 3))
-------------------CteConsumer[cteId= ( CTEId#7=] )
 --------------PhysicalDistribute
 ----------------PhysicalProject
-------------------hashJoin[INNER_JOIN](ss2.ca_county = ss3.ca_county)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------filter((ss3.d_year = 2000)(ss3.d_qoy = 3))
---------------------------CteConsumer[cteId= ( CTEId#6=] )
---------------------hashJoin[INNER_JOIN](ss1.ca_county = ss2.ca_county)(CASE WHEN (web_sales > 0.00) THEN (cast(web_sales as DECIMALV3(38, 8)) / web_sales) ELSE NULL END > CASE WHEN (store_sales > 0.00) THEN (cast(store_sales as DECIMALV3(38, 8)) / store_sales) ELSE NULL END)
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------filter((ss2.d_year = 2000)(ss2.d_qoy = 2))
-----------------------------CteConsumer[cteId= ( CTEId#6=] )
-----------------------hashJoin[INNER_JOIN](ss1.ca_county = ws1.ca_county)
+------------------filter((ws3.d_year = 2000)(ws3.d_qoy = 3))
+--------------------CteConsumer[cteId= ( CTEId#7=] )
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN](ss2.ca_county = ss3.ca_county)
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------filter((ss3.d_year = 2000)(ss3.d_qoy = 3))
+------------------------CteConsumer[cteId= ( CTEId#6=] )
+------------------hashJoin[INNER_JOIN](ws1.ca_county = ws2.ca_county)(CASE WHEN (web_sales > 0.00) THEN (cast(web_sales as DECIMALV3(38, 8)) / web_sales) ELSE NULL END > CASE WHEN (store_sales > 0.00) THEN (cast(store_sales as DECIMALV3(38, 8)) / store_sales) ELSE NULL END)
+--------------------hashJoin[INNER_JOIN](ss1.ca_county = ws1.ca_county)
+----------------------hashJoin[INNER_JOIN](ss1.ca_county = ss2.ca_county)
 ------------------------PhysicalDistribute
 --------------------------PhysicalProject
 ----------------------------filter((ss1.d_year = 2000)(ss1.d_qoy = 1))
 ------------------------------CteConsumer[cteId= ( CTEId#6=] )
-------------------------hashJoin[INNER_JOIN](ws1.ca_county = ws2.ca_county)
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------filter((ws1.d_year = 2000)(ws1.d_qoy = 1))
---------------------------------CteConsumer[cteId= ( CTEId#7=] )
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------filter((ws2.d_qoy = 2)(ws2.d_year = 2000))
---------------------------------CteConsumer[cteId= ( CTEId#7=] )
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------filter((ss2.d_year = 2000)(ss2.d_qoy = 2))
+------------------------------CteConsumer[cteId= ( CTEId#6=] )
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------filter((ws1.d_year = 2000)(ws1.d_qoy = 1))
+----------------------------CteConsumer[cteId= ( CTEId#7=] )
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------filter((ws2.d_qoy = 2)(ws2.d_year = 2000))
+--------------------------CteConsumer[cteId= ( CTEId#7=] )
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org