You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/04/29 02:41:19 UTC

[incubator-doris] branch master updated: [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ef09b8354 [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)
9ef09b8354 is described below

commit 9ef09b8354cbd499512efb4df4a5e9c8eba1b1af
Author: zhengshiJ <32...@users.noreply.github.com>
AuthorDate: Fri Apr 29 10:41:12 2022 +0800

    [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)
    
    * [feature](statistics) Statistics derivation.Step 1:ScanNode implementation
    
    Co-authored-by: jianghaochen <ji...@meituan.com>
---
 .../org/apache/doris/planner/BrokerScanNode.java   |   9 +-
 .../java/org/apache/doris/planner/EsScanNode.java  |   2 +-
 .../org/apache/doris/planner/HiveScanNode.java     |   2 +-
 .../org/apache/doris/planner/IcebergScanNode.java  |   2 +-
 .../org/apache/doris/planner/LoadScanNode.java     |   6 +-
 .../org/apache/doris/planner/MysqlScanNode.java    |   2 +-
 .../org/apache/doris/planner/OdbcScanNode.java     |   2 +-
 .../org/apache/doris/planner/OlapScanNode.java     |  53 +++----
 .../java/org/apache/doris/planner/PlanNode.java    |  33 +++++
 .../java/org/apache/doris/planner/ScanNode.java    |   3 +-
 .../org/apache/doris/planner/SchemaScanNode.java   |   2 +-
 .../apache/doris/planner/SingleNodePlanner.java    |   1 -
 .../java/org/apache/doris/qe/StmtExecutor.java     |   8 -
 .../apache/doris/statistics/BaseStatsDerive.java   | 161 +++++++++++++++++++++
 .../org/apache/doris/statistics/DeriveFactory.java |  36 +++++
 .../doris/statistics/OlapScanStatsDerive.java      | 112 ++++++++++++++
 .../org/apache/doris/statistics/Statistics.java    |  13 ++
 .../apache/doris/statistics/StatisticsManager.java |   4 +
 .../apache/doris/statistics/StatsDeriveResult.java |  56 +++++++
 .../doris/statistics/StatsRecursiveDerive.java     |  56 +++++++
 .../org/apache/doris/statistics/TableStats.java    |  12 ++
 .../org/apache/doris/analysis/ExplainTest.java     |  13 +-
 .../org/apache/doris/planner/QueryPlanTest.java    |   7 +-
 23 files changed, 545 insertions(+), 50 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
index 7338d9bbb0..73aa1fb04d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
@@ -138,7 +138,14 @@ public class BrokerScanNode extends LoadScanNode {
 
     public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
                           List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
-        super(id, destTupleDesc, planNodeName);
+        super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE);
+        this.fileStatusesList = fileStatusesList;
+        this.filesAdded = filesAdded;
+    }
+
+    public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
+                          List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded, NodeType nodeType) {
+        super(id, destTupleDesc, planNodeName, nodeType);
         this.fileStatusesList = fileStatusesList;
         this.filesAdded = filesAdded;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
index 2ed1a4bde8..96dbdbe934 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
@@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode {
     boolean isFinalized = false;
 
     public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName);
+        super(id, desc, planNodeName, NodeType.ES_SCAN_NODE);
         table = (EsTable) (desc.getTable());
         esTablePartitions = table.getEsTablePartitions();
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
index 76cf534313..c18533d302 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
@@ -100,7 +100,7 @@ public class HiveScanNode extends BrokerScanNode {
 
     public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
                         List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
-        super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded);
+        super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE);
         this.hiveTable = (HiveTable) destTupleDesc.getTable();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
index 5428c9ee55..5c7dd1fad0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
@@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode {
 
     public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
                            List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
-        super(id, desc, planNodeName, fileStatusesList, filesAdded);
+        super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE);
         icebergTable = (IcebergTable) desc.getTable();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
index b6dbb4f782..82299e2ea6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
@@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode {
     protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND;
 
     public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName);
+        super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE);
+    }
+
+    public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
+        super(id, desc, planNodeName, nodeType);
     }
 
     protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
index fcc22125c9..9235b1fc75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
@@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode {
      * Constructs node to scan given data files of table 'tbl'.
      */
     public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) {
-        super(id, desc, "SCAN MYSQL");
+        super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE);
         tblName = "`" + tbl.getMysqlTableName() + "`";
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
index 90f989d35c..1f32b9e938 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
@@ -73,7 +73,7 @@ public class OdbcScanNode extends ScanNode {
      * Constructs node to scan given data files of table 'tbl'.
      */
     public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) {
-        super(id, desc, "SCAN ODBC");
+        super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE);
         connectString = tbl.getConnectString();
         odbcType = tbl.getOdbcTableType();
         tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index f74666a18c..6a1fc49b03 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -53,6 +53,7 @@ import org.apache.doris.common.util.Util;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.SessionVariable;
 import org.apache.doris.resource.Tag;
+import org.apache.doris.statistics.StatsRecursiveDerive;
 import org.apache.doris.system.Backend;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TNetworkAddress;
@@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode {
 
     // Constructs node to scan given data files of table 'tbl'.
     public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName);
+        super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE);
         olapTable = (OlapTable) desc.getTable();
     }
 
@@ -346,10 +347,25 @@ public class OlapScanNode extends ScanNode {
          * - So only an inaccurate cardinality can be calculated here.
          */
         if (analyzer.safeIsEnableJoinReorderBasedCost()) {
+            mockRowCountInStatistic();
             computeInaccurateCardinality();
         }
     }
 
+    /**
+     * Remove the method after statistics collection is working properly
+     */
+    public void mockRowCountInStatistic() {
+        long tableId = desc.getTable().getId();
+        cardinality = 0;
+        for (long selectedPartitionId : selectedPartitionIds) {
+            final Partition partition = olapTable.getPartition(selectedPartitionId);
+            final MaterializedIndex baseIndex = partition.getBaseIndex();
+            cardinality += baseIndex.getRowCount();
+        }
+        Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
+    }
+
     @Override
     public void finalize(Analyzer analyzer) throws UserException {
         LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc);
@@ -386,6 +402,12 @@ public class OlapScanNode extends ScanNode {
         }
         // when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats()
         cardinality = cardinality == -1 ? 0 : cardinality;
+
+        // update statsDeriveResult for real statistics
+        // After statistics collection is complete, remove the logic
+        if (analyzer.safeIsEnableJoinReorderBasedCost()) {
+            statsDeriveResult.setRowCount(cardinality);
+        }
     }
 
     @Override
@@ -397,30 +419,9 @@ public class OlapScanNode extends ScanNode {
         numNodes = numNodes <= 0 ? 1 : numNodes;
     }
 
-    /**
-     * Calculate inaccurate cardinality.
-     * cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds
-     * The cardinality here is actually inaccurate, it will be greater than the actual value.
-     * There are two reasons
-     * 1. During the actual execution, not all tablets belonging to the selected partition will be scanned.
-     * Some tablets may have been pruned before execution.
-     * 2. The base index may eventually be replaced by mv index.
-     * <p>
-     * There are three steps to calculate cardinality
-     * 1. Calculate how many rows were scanned
-     * 2. Apply conjunct
-     * 3. Apply limit
-     */
-    private void computeInaccurateCardinality() {
-        // step1: Calculate how many rows were scanned
-        cardinality = 0;
-        for (long selectedPartitionId : selectedPartitionIds) {
-            final Partition partition = olapTable.getPartition(selectedPartitionId);
-            final MaterializedIndex baseIndex = partition.getBaseIndex();
-            cardinality += baseIndex.getRowCount();
-        }
-        applyConjunctsSelectivity();
-        capCardinalityAtLimit();
+    private void computeInaccurateCardinality() throws UserException {
+        StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
+        cardinality = statsDeriveResult.getRowCount();
     }
 
     private Collection<Long> partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException {
@@ -563,7 +564,7 @@ public class OlapScanNode extends ScanNode {
 
             result.add(scanRangeLocations);
         }
-        // FIXME(dhc): we use cardinality here to simulate ndv
+
         if (tablets.size() == 0) {
             desc.setCardinality(0);
         } else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
index 58a0019962..13ac58c40f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
@@ -36,6 +36,7 @@ import org.apache.doris.common.NotImplementedException;
 import org.apache.doris.common.TreeNode;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.VectorizedUtil;
+import org.apache.doris.statistics.StatsDeriveResult;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TFunctionBinaryType;
 import org.apache.doris.thrift.TPlan;
@@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
 
     protected List<SlotId> outputSlotIds;
 
+    protected NodeType nodeType = NodeType.DEFAULT;
+    protected StatsDeriveResult statsDeriveResult;
+
     protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) {
         this.id = id;
         this.limit = -1;
@@ -173,12 +177,41 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
         this.planNodeName = VectorizedUtil.isVectorized() ?
                 "V" + planNodeName : planNodeName;
         this.numInstances = 1;
+        this.nodeType = nodeType;
+    }
+
+    public enum NodeType {
+        DEFAULT,
+        AGG_NODE,
+        BROKER_SCAN_NODE,
+        HASH_JOIN_NODE,
+        HIVE_SCAN_NODE,
+        MERGE_NODE,
+        ES_SCAN_NODE,
+        ICEBREG_SCAN_NODE,
+        LOAD_SCAN_NODE,
+        MYSQL_SCAN_NODE,
+        ODBC_SCAN_NODE,
+        OLAP_SCAN_NODE,
+        SCHEMA_SCAN_NODE,
     }
 
     public String getPlanNodeName() {
         return planNodeName;
     }
 
+    public StatsDeriveResult getStatsDeriveResult() {
+        return statsDeriveResult;
+    }
+
+    public NodeType getNodeType() {
+        return nodeType;
+    }
+
+    public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) {
+        this.statsDeriveResult = statsDeriveResult;
+    }
+
     /**
      * Sets tblRefIds_, tupleIds_, and nullableTupleIds_.
      * The default implementation is a no-op.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
index 8b8c52b5bc..a891f7616f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
@@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode {
     protected String sortColumn = null;
     protected Analyzer analyzer;
 
-    public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
+    public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
         super(id, desc.getId().asList(), planNodeName);
+        super.nodeType = nodeType;
         this.desc = desc;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
index c5692c38e8..07152bfd5e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
@@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode {
      * Constructs node to scan given data files of table 'tbl'.
      */
     public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) {
-        super(id, desc, "SCAN SCHEMA");
+        super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE);
         this.tableName = desc.getTable().getName();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 8ad921bf09..a78435a3dd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -1725,7 +1725,6 @@ public class SingleNodePlanner {
         scanNodeList.add(scanNode);
 
         scanNode.init(analyzer);
-
         return scanNode;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index e913f9cc96..7195591a29 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -699,14 +699,6 @@ public class StmtExecutor implements ProfileWriter {
                 }
                 if (explainOptions != null) parsedStmt.setIsExplain(explainOptions);
             }
-
-            if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) {
-                if (ConnectContext.get() != null &&
-                        ConnectContext.get().getExecutor() != null &&
-                        ConnectContext.get().getExecutor().getParsedStmt() != null) {
-                    ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false));
-                }
-            }
         }
         plannerProfile.setQueryAnalysisFinishTime();
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
new file mode 100644
index 0000000000..ccb58e9287
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.PlanNode;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+
+public class BaseStatsDerive {
+    private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class);
+    // estimate of the output rowCount of this node;
+    // invalid: -1
+    protected long rowCount = -1;
+    protected long limit = -1;
+
+    protected List<Expr> conjuncts = Lists.newArrayList();
+    protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList();
+
+    protected void init(PlanNode node) throws UserException {
+        limit = node.getLimit();
+        conjuncts.addAll(node.getConjuncts());
+
+        for (PlanNode childNode : node.getChildren()) {
+            StatsDeriveResult result = childNode.getStatsDeriveResult();
+            if (result == null) {
+                throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
+                + "parentNodeType is " + node.getNodeType());
+            }
+            childrenStatsResult.add(result);
+        }
+    }
+
+    public StatsDeriveResult deriveStats() {
+        return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
+    }
+
+    public boolean hasLimit() {
+        return limit > -1;
+    }
+
+    protected void applyConjunctsSelectivity() {
+        if (rowCount == -1) {
+            return;
+        }
+        applySelectivity();
+    }
+
+    private void applySelectivity() {
+        double selectivity = computeSelectivity();
+        Preconditions.checkState(rowCount >= 0);
+        long preConjunctrowCount = rowCount;
+        rowCount = Math.round(rowCount * selectivity);
+        // don't round rowCount down to zero for safety.
+        if (rowCount == 0 && preConjunctrowCount > 0) {
+            rowCount = 1;
+        }
+    }
+
+    protected double computeSelectivity() {
+        for (Expr expr : conjuncts) {
+            expr.setSelectivity();
+        }
+        return computeCombinedSelectivity(conjuncts);
+    }
+
+    /**
+     * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to
+     * address the following estimation challenges:
+     * 1. The individual selectivities of conjuncts may be unknown.
+     * 2. Two selectivities, whether known or unknown, could be correlated. Assuming
+     * independence can lead to significant underestimation.
+     * <p>
+     * The first issue is addressed by using a single default selectivity that is
+     * representative of all conjuncts with unknown selectivities.
+     * The second issue is addressed by an exponential backoff when multiplying each
+     * additional selectivity into the final result.
+     */
+    protected double computeCombinedSelectivity(List<Expr> conjuncts) {
+        // Collect all estimated selectivities.
+        List<Double> selectivities = new ArrayList<>();
+        for (Expr e : conjuncts) {
+            if (e.hasSelectivity()) selectivities.add(e.getSelectivity());
+        }
+        if (selectivities.size() != conjuncts.size()) {
+            // Some conjuncts have no estimated selectivity. Use a single default
+            // representative selectivity for all those conjuncts.
+            selectivities.add(Expr.DEFAULT_SELECTIVITY);
+        }
+        // Sort the selectivities to get a consistent estimate, regardless of the original
+        // conjunct order. Sort in ascending order such that the most selective conjunct
+        // is fully applied.
+        Collections.sort(selectivities);
+        double result = 1.0;
+        // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n)
+        for (int i = 0; i < selectivities.size(); ++i) {
+            // Exponential backoff for each selectivity multiplied into the final result.
+            result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1));
+        }
+        // Bound result in [0, 1]
+        return Math.max(0.0, Math.min(1.0, result));
+    }
+
+    protected void capRowCountAtLimit() {
+        if (hasLimit()) {
+            rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit);
+        }
+    }
+
+
+    // Currently it simply adds the number of rows of children
+    protected long deriveRowCount() {
+        for (StatsDeriveResult statsDeriveResult : childrenStatsResult) {
+            rowCount = Math.max(rowCount, statsDeriveResult.getRowCount());
+        }
+        applyConjunctsSelectivity();
+        capRowCountAtLimit();
+        return rowCount;
+    }
+
+
+    protected HashMap<SlotId, Float> deriveColumnToDataSize() {
+        HashMap<SlotId, Float> columnToDataSize = new HashMap<>();
+        for (StatsDeriveResult child : childrenStatsResult) {
+            columnToDataSize.putAll(child.getColumnToDataSize());
+        }
+        return columnToDataSize;
+    }
+
+    protected HashMap<SlotId, Long> deriveColumnToNdv() {
+        HashMap<SlotId, Long> columnToNdv = new HashMap<>();
+        for (StatsDeriveResult child : childrenStatsResult) {
+            columnToNdv.putAll(child.getColumnToNdv());
+        }
+        return columnToNdv;
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java
new file mode 100644
index 0000000000..d663bf5e08
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.planner.PlanNode;
+
+public class DeriveFactory {
+
+    public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) {
+        switch (nodeType) {
+            case AGG_NODE:
+            case HASH_JOIN_NODE:
+            case MERGE_NODE:
+                break;
+            case OLAP_SCAN_NODE:
+                return new OlapScanStatsDerive();
+            case DEFAULT:
+        }
+        return new BaseStatsDerive();
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
new file mode 100644
index 0000000000..ff514aa55e
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.base.Preconditions;
+import org.apache.doris.analysis.SlotDescriptor;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.catalog.Catalog;
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.OlapScanNode;
+import org.apache.doris.planner.PlanNode;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class OlapScanStatsDerive extends BaseStatsDerive {
+    // Currently, due to the structure of doris,
+    // the selected materialized view is not determined when calculating the statistical information of scan,
+    // so baseIndex is used for calculation when generating Planner.
+
+    // The rowCount here is the number of rows.
+    private long inputRowCount = -1;
+    private Map<SlotId, Float> slotIdToDataSize;
+    private Map<SlotId, Long> slotIdToNdv;
+    private Map<SlotId, Pair<Long, String>> slotIdToTableIdAndColumnName;
+
+    @Override
+    public void init(PlanNode node) throws UserException {
+        Preconditions.checkState(node instanceof OlapScanNode);
+        super.init(node);
+        buildStructure((OlapScanNode)node);
+    }
+
+    @Override
+    public StatsDeriveResult deriveStats() {
+        /**
+         * Compute InAccurate cardinality before mv selector and tablet pruning.
+         * - Accurate statistical information relies on the selector of materialized views and bucket reduction.
+         * - However, Those both processes occur after the reorder algorithm is completed.
+         * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm.
+         * - So only an inaccurate cardinality can be calculated here.
+         */
+        rowCount = inputRowCount;
+        for (Map.Entry<SlotId, Pair<Long, String>> pairEntry : slotIdToTableIdAndColumnName.entrySet()) {
+            Pair<Long, Float> ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue());
+            long ndv = ndvAndDataSize.first;
+            float dataSize = ndvAndDataSize.second;
+            slotIdToNdv.put(pairEntry.getKey(), ndv);
+            slotIdToDataSize.put(pairEntry.getKey(), dataSize);
+        }
+        return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv);
+    }
+
+    public void buildStructure(OlapScanNode node) {
+        slotIdToDataSize = new HashMap<>();
+        slotIdToNdv = new HashMap<>();
+        if (node.getTupleDesc() != null
+            && node.getTupleDesc().getTable() != null) {
+            long tableId = node.getTupleDesc().getTable().getId();
+            inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager()
+                    .getStatistics().getTableStats(tableId).getRowCount();
+        }
+        for (SlotDescriptor slot : node.getTupleDesc().getSlots()) {
+            if (!slot.isMaterialized()) {
+                continue;
+            }
+
+            long tableId = slot.getParent().getTable().getId();
+            String columnName = slot.getColumn().getName();
+            slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName));
+        }
+    }
+
+    //TODO:Implement the getStatistics interface
+    //now there is nothing in statistics, need to wait for collection finished
+    public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) {
+        long ndv = -1;
+        float dataSize = -1;
+        /*
+        if (Catalog.getCurrentCatalog()
+                    .getStatisticsManager()
+                    .getStatistics()
+                    .getColumnStats(pair.first) != null) {
+                ndv = Catalog.getCurrentCatalog()
+                        .getStatisticsManager()
+                        .getStatistics()
+                        .getColumnStats(pair.first).get(pair.second).getNdv();
+                dataSize = Catalog.getCurrentCatalog()
+                        .getStatisticsManager()
+                        .getStatistics()
+                        .getColumnStats(pair.first).get(pair.second).getDataSize();
+         }
+         */
+        return new Pair<>(ndv, dataSize);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index b17a37858d..58003de90c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -70,4 +70,17 @@ public class Statistics {
         }
         return tableStats.getNameToColumnStats();
     }
+
+    // TODO: mock statistics need to be removed in the future
+    public void mockTableStatsWithRowCount(long tableId, long rowCount) {
+        TableStats tableStats = idToTableStats.get(tableId);
+        if (tableStats == null) {
+            tableStats = new TableStats();
+            idToTableStats.put(tableId, tableStats);
+        }
+
+        if (tableStats.getRowCount() != rowCount) {
+            tableStats.setRowCount(rowCount);
+        }
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
index e994cce212..b1c88e36a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
@@ -136,4 +136,8 @@ public class StatisticsManager {
         Table table = db.getTableOrAnalysisException(tableName);
         return table;
     }
+
+    public Statistics getStatistics() {
+        return statistics;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
new file mode 100644
index 0000000000..b9b0d8024e
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.collect.Maps;
+import org.apache.doris.analysis.SlotId;
+
+import java.util.Map;
+
+// This structure is maintained in each operator to store the statistical information results obtained by the operator.
+public class StatsDeriveResult {
+    private long rowCount = -1;
+    // The data size of the corresponding column in the operator
+    // The actual key is slotId
+    private final Map<SlotId, Float> columnToDataSize = Maps.newHashMap();
+    // The ndv of the corresponding column in the operator
+    // The actual key is slotId
+    private final Map<SlotId, Long> columnToNdv = Maps.newHashMap();
+
+    public StatsDeriveResult(long rowCount, Map<SlotId, Float> columnToDataSize, Map<SlotId, Long> columnToNdv) {
+        this.rowCount = rowCount;
+        this.columnToDataSize.putAll(columnToDataSize);
+        this.columnToNdv.putAll(columnToNdv);
+    }
+
+    public void setRowCount(long rowCount) {
+        this.rowCount = rowCount;
+    }
+
+    public long getRowCount() {
+        return rowCount;
+    }
+
+    public Map<SlotId, Long> getColumnToNdv() {
+        return columnToNdv;
+    }
+
+    public Map<SlotId, Float> getColumnToDataSize() {
+        return columnToDataSize;
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java
new file mode 100644
index 0000000000..e6159a594d
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.PlanNode;
+
+
+public class StatsRecursiveDerive {
+    private StatsRecursiveDerive() {}
+
+    public static StatsRecursiveDerive getStatsRecursiveDerive() {
+        return Inner.INSTANCE;
+    }
+
+    private static class Inner {
+        private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive();
+    }
+
+    /**
+     * Recursively complete the derivation of statistics for this node and all its children
+     * @param node
+     * This parameter is an input and output parameter,
+     * which will store the derivation result of statistical information in the corresponding node
+     */
+    public void statsRecursiveDerive(PlanNode node) throws UserException {
+        if (node.getStatsDeriveResult() != null) {
+            return;
+        }
+        for (PlanNode childNode : node.getChildren()) {
+            if (childNode.getStatsDeriveResult() == null) {
+                statsRecursiveDerive(childNode);
+            }
+        }
+        DeriveFactory deriveFactory = new DeriveFactory();
+        BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType());
+        deriveStats.init(node);
+        StatsDeriveResult result = deriveStats.deriveStats();
+        node.setStatsDeriveResult(result);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
index efb35bbe10..ef494bd9f6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
@@ -94,4 +94,16 @@ public class TableStats {
     public Map<String, ColumnStats> getNameToColumnStats() {
         return nameToColumnStats;
     }
+
+    public long getRowCount() {
+        return rowCount;
+    }
+
+    public long getDataSize() {
+        return dataSize;
+    }
+
+    public void setRowCount(long rowCount) {
+        this.rowCount = rowCount;
+    }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
index 3fca5643a5..f94e69b142 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
@@ -72,20 +72,27 @@ public class ExplainTest {
         Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql());
     }
 
-    public void testExplainSelect() throws Exception {
-        String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
+    public void testExplainInsertInto() throws Exception {
+        String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2";
         String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
         System.out.println(explainString);
         Assert.assertFalse(explainString.contains("CAST"));
     }
 
-    public void testExplainInsertInto() throws Exception {
+    public void testExplainVerboseInsertInto() throws Exception {
         String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2";
         String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true);
         System.out.println(explainString);
         Assert.assertTrue(explainString.contains("CAST"));
     }
 
+    public void testExplainSelect() throws Exception {
+        String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
+        String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
+        System.out.println(explainString);
+        Assert.assertFalse(explainString.contains("CAST"));
+    }
+
     public void testExplainVerboseSelect() throws Exception {
         String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';";
         String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
index ee09be1d8a..62b741b61b 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@@ -1991,8 +1991,9 @@ public class QueryPlanTest {
     public void testExplainInsertInto() throws Exception {
         ExplainTest explainTest = new ExplainTest();
         explainTest.before(connectContext);
-        explainTest.testExplainSelect();
         explainTest.testExplainInsertInto();
+        explainTest.testExplainVerboseInsertInto();
+        explainTest.testExplainSelect();
         explainTest.testExplainVerboseSelect();
         explainTest.testExplainConcatSelect();
         explainTest.testExplainVerboseConcatSelect();
@@ -2088,7 +2089,7 @@ public class QueryPlanTest {
                 "\"storage_medium\" = \"HDD\",\n" +
                 "\"storage_format\" = \"V2\"\n" +
                 ");\n");
-        String queryStr = "EXPLAIN INSERT INTO result_exprs\n" +
+        String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" +
                 "SELECT a.aid,\n" +
                 "       b.bid\n" +
                 "FROM\n" +
@@ -2098,7 +2099,7 @@ public class QueryPlanTest {
         String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr);
         Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4"));
         System.out.println(explainString);
-        Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4"));
+        Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4"));
     }
 
     @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org