You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/04/29 02:41:19 UTC
[incubator-doris] branch master updated: [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9ef09b8354 [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)
9ef09b8354 is described below
commit 9ef09b8354cbd499512efb4df4a5e9c8eba1b1af
Author: zhengshiJ <32...@users.noreply.github.com>
AuthorDate: Fri Apr 29 10:41:12 2022 +0800
[feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947)
* [feature](statistics) Statistics derivation.Step 1:ScanNode implementation
Co-authored-by: jianghaochen <ji...@meituan.com>
---
.../org/apache/doris/planner/BrokerScanNode.java | 9 +-
.../java/org/apache/doris/planner/EsScanNode.java | 2 +-
.../org/apache/doris/planner/HiveScanNode.java | 2 +-
.../org/apache/doris/planner/IcebergScanNode.java | 2 +-
.../org/apache/doris/planner/LoadScanNode.java | 6 +-
.../org/apache/doris/planner/MysqlScanNode.java | 2 +-
.../org/apache/doris/planner/OdbcScanNode.java | 2 +-
.../org/apache/doris/planner/OlapScanNode.java | 53 +++----
.../java/org/apache/doris/planner/PlanNode.java | 33 +++++
.../java/org/apache/doris/planner/ScanNode.java | 3 +-
.../org/apache/doris/planner/SchemaScanNode.java | 2 +-
.../apache/doris/planner/SingleNodePlanner.java | 1 -
.../java/org/apache/doris/qe/StmtExecutor.java | 8 -
.../apache/doris/statistics/BaseStatsDerive.java | 161 +++++++++++++++++++++
.../org/apache/doris/statistics/DeriveFactory.java | 36 +++++
.../doris/statistics/OlapScanStatsDerive.java | 112 ++++++++++++++
.../org/apache/doris/statistics/Statistics.java | 13 ++
.../apache/doris/statistics/StatisticsManager.java | 4 +
.../apache/doris/statistics/StatsDeriveResult.java | 56 +++++++
.../doris/statistics/StatsRecursiveDerive.java | 56 +++++++
.../org/apache/doris/statistics/TableStats.java | 12 ++
.../org/apache/doris/analysis/ExplainTest.java | 13 +-
.../org/apache/doris/planner/QueryPlanTest.java | 7 +-
23 files changed, 545 insertions(+), 50 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
index 7338d9bbb0..73aa1fb04d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
@@ -138,7 +138,14 @@ public class BrokerScanNode extends LoadScanNode {
public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
- super(id, destTupleDesc, planNodeName);
+ super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE);
+ this.fileStatusesList = fileStatusesList;
+ this.filesAdded = filesAdded;
+ }
+
+ public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
+ List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded, NodeType nodeType) {
+ super(id, destTupleDesc, planNodeName, nodeType);
this.fileStatusesList = fileStatusesList;
this.filesAdded = filesAdded;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
index 2ed1a4bde8..96dbdbe934 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java
@@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode {
boolean isFinalized = false;
public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
- super(id, desc, planNodeName);
+ super(id, desc, planNodeName, NodeType.ES_SCAN_NODE);
table = (EsTable) (desc.getTable());
esTablePartitions = table.getEsTablePartitions();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
index 76cf534313..c18533d302 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java
@@ -100,7 +100,7 @@ public class HiveScanNode extends BrokerScanNode {
public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
- super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded);
+ super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE);
this.hiveTable = (HiveTable) destTupleDesc.getTable();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
index 5428c9ee55..5c7dd1fad0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java
@@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode {
public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName,
List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) {
- super(id, desc, planNodeName, fileStatusesList, filesAdded);
+ super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE);
icebergTable = (IcebergTable) desc.getTable();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
index b6dbb4f782..82299e2ea6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java
@@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode {
protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND;
public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
- super(id, desc, planNodeName);
+ super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE);
+ }
+
+ public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
+ super(id, desc, planNodeName, nodeType);
}
protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
index fcc22125c9..9235b1fc75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
@@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode {
* Constructs node to scan given data files of table 'tbl'.
*/
public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) {
- super(id, desc, "SCAN MYSQL");
+ super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE);
tblName = "`" + tbl.getMysqlTableName() + "`";
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
index 90f989d35c..1f32b9e938 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
@@ -73,7 +73,7 @@ public class OdbcScanNode extends ScanNode {
* Constructs node to scan given data files of table 'tbl'.
*/
public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) {
- super(id, desc, "SCAN ODBC");
+ super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE);
connectString = tbl.getConnectString();
odbcType = tbl.getOdbcTableType();
tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index f74666a18c..6a1fc49b03 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -53,6 +53,7 @@ import org.apache.doris.common.util.Util;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.resource.Tag;
+import org.apache.doris.statistics.StatsRecursiveDerive;
import org.apache.doris.system.Backend;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TNetworkAddress;
@@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode {
// Constructs node to scan given data files of table 'tbl'.
public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
- super(id, desc, planNodeName);
+ super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE);
olapTable = (OlapTable) desc.getTable();
}
@@ -346,10 +347,25 @@ public class OlapScanNode extends ScanNode {
* - So only an inaccurate cardinality can be calculated here.
*/
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
+ mockRowCountInStatistic();
computeInaccurateCardinality();
}
}
+ /**
+ * Remove the method after statistics collection is working properly
+ */
+ public void mockRowCountInStatistic() {
+ long tableId = desc.getTable().getId();
+ cardinality = 0;
+ for (long selectedPartitionId : selectedPartitionIds) {
+ final Partition partition = olapTable.getPartition(selectedPartitionId);
+ final MaterializedIndex baseIndex = partition.getBaseIndex();
+ cardinality += baseIndex.getRowCount();
+ }
+ Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality);
+ }
+
@Override
public void finalize(Analyzer analyzer) throws UserException {
LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc);
@@ -386,6 +402,12 @@ public class OlapScanNode extends ScanNode {
}
// when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats()
cardinality = cardinality == -1 ? 0 : cardinality;
+
+ // update statsDeriveResult for real statistics
+ // After statistics collection is complete, remove the logic
+ if (analyzer.safeIsEnableJoinReorderBasedCost()) {
+ statsDeriveResult.setRowCount(cardinality);
+ }
}
@Override
@@ -397,30 +419,9 @@ public class OlapScanNode extends ScanNode {
numNodes = numNodes <= 0 ? 1 : numNodes;
}
- /**
- * Calculate inaccurate cardinality.
- * cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds
- * The cardinality here is actually inaccurate, it will be greater than the actual value.
- * There are two reasons
- * 1. During the actual execution, not all tablets belonging to the selected partition will be scanned.
- * Some tablets may have been pruned before execution.
- * 2. The base index may eventually be replaced by mv index.
- * <p>
- * There are three steps to calculate cardinality
- * 1. Calculate how many rows were scanned
- * 2. Apply conjunct
- * 3. Apply limit
- */
- private void computeInaccurateCardinality() {
- // step1: Calculate how many rows were scanned
- cardinality = 0;
- for (long selectedPartitionId : selectedPartitionIds) {
- final Partition partition = olapTable.getPartition(selectedPartitionId);
- final MaterializedIndex baseIndex = partition.getBaseIndex();
- cardinality += baseIndex.getRowCount();
- }
- applyConjunctsSelectivity();
- capCardinalityAtLimit();
+ private void computeInaccurateCardinality() throws UserException {
+ StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
+ cardinality = statsDeriveResult.getRowCount();
}
private Collection<Long> partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException {
@@ -563,7 +564,7 @@ public class OlapScanNode extends ScanNode {
result.add(scanRangeLocations);
}
- // FIXME(dhc): we use cardinality here to simulate ndv
+
if (tablets.size() == 0) {
desc.setCardinality(0);
} else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
index 58a0019962..13ac58c40f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
@@ -36,6 +36,7 @@ import org.apache.doris.common.NotImplementedException;
import org.apache.doris.common.TreeNode;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.VectorizedUtil;
+import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TFunctionBinaryType;
import org.apache.doris.thrift.TPlan;
@@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
protected List<SlotId> outputSlotIds;
+ protected NodeType nodeType = NodeType.DEFAULT;
+ protected StatsDeriveResult statsDeriveResult;
+
protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) {
this.id = id;
this.limit = -1;
@@ -173,12 +177,41 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
this.planNodeName = VectorizedUtil.isVectorized() ?
"V" + planNodeName : planNodeName;
this.numInstances = 1;
+ this.nodeType = nodeType;
+ }
+
+ public enum NodeType {
+ DEFAULT,
+ AGG_NODE,
+ BROKER_SCAN_NODE,
+ HASH_JOIN_NODE,
+ HIVE_SCAN_NODE,
+ MERGE_NODE,
+ ES_SCAN_NODE,
+ ICEBREG_SCAN_NODE,
+ LOAD_SCAN_NODE,
+ MYSQL_SCAN_NODE,
+ ODBC_SCAN_NODE,
+ OLAP_SCAN_NODE,
+ SCHEMA_SCAN_NODE,
}
public String getPlanNodeName() {
return planNodeName;
}
+ public StatsDeriveResult getStatsDeriveResult() {
+ return statsDeriveResult;
+ }
+
+ public NodeType getNodeType() {
+ return nodeType;
+ }
+
+ public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) {
+ this.statsDeriveResult = statsDeriveResult;
+ }
+
/**
* Sets tblRefIds_, tupleIds_, and nullableTupleIds_.
* The default implementation is a no-op.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
index 8b8c52b5bc..a891f7616f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java
@@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode {
protected String sortColumn = null;
protected Analyzer analyzer;
- public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
+ public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) {
super(id, desc.getId().asList(), planNodeName);
+ super.nodeType = nodeType;
this.desc = desc;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
index c5692c38e8..07152bfd5e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java
@@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode {
* Constructs node to scan given data files of table 'tbl'.
*/
public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) {
- super(id, desc, "SCAN SCHEMA");
+ super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE);
this.tableName = desc.getTable().getName();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 8ad921bf09..a78435a3dd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -1725,7 +1725,6 @@ public class SingleNodePlanner {
scanNodeList.add(scanNode);
scanNode.init(analyzer);
-
return scanNode;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index e913f9cc96..7195591a29 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -699,14 +699,6 @@ public class StmtExecutor implements ProfileWriter {
}
if (explainOptions != null) parsedStmt.setIsExplain(explainOptions);
}
-
- if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) {
- if (ConnectContext.get() != null &&
- ConnectContext.get().getExecutor() != null &&
- ConnectContext.get().getExecutor().getParsedStmt() != null) {
- ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false));
- }
- }
}
plannerProfile.setQueryAnalysisFinishTime();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
new file mode 100644
index 0000000000..ccb58e9287
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.PlanNode;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+
+public class BaseStatsDerive {
+ private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class);
+ // estimate of the output rowCount of this node;
+ // invalid: -1
+ protected long rowCount = -1;
+ protected long limit = -1;
+
+ protected List<Expr> conjuncts = Lists.newArrayList();
+ protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList();
+
+ protected void init(PlanNode node) throws UserException {
+ limit = node.getLimit();
+ conjuncts.addAll(node.getConjuncts());
+
+ for (PlanNode childNode : node.getChildren()) {
+ StatsDeriveResult result = childNode.getStatsDeriveResult();
+ if (result == null) {
+ throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType()
+ + "parentNodeType is " + node.getNodeType());
+ }
+ childrenStatsResult.add(result);
+ }
+ }
+
+ public StatsDeriveResult deriveStats() {
+ return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
+ }
+
+ public boolean hasLimit() {
+ return limit > -1;
+ }
+
+ protected void applyConjunctsSelectivity() {
+ if (rowCount == -1) {
+ return;
+ }
+ applySelectivity();
+ }
+
+ private void applySelectivity() {
+ double selectivity = computeSelectivity();
+ Preconditions.checkState(rowCount >= 0);
+ long preConjunctrowCount = rowCount;
+ rowCount = Math.round(rowCount * selectivity);
+ // don't round rowCount down to zero for safety.
+ if (rowCount == 0 && preConjunctrowCount > 0) {
+ rowCount = 1;
+ }
+ }
+
+ protected double computeSelectivity() {
+ for (Expr expr : conjuncts) {
+ expr.setSelectivity();
+ }
+ return computeCombinedSelectivity(conjuncts);
+ }
+
+ /**
+ * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to
+ * address the following estimation challenges:
+ * 1. The individual selectivities of conjuncts may be unknown.
+ * 2. Two selectivities, whether known or unknown, could be correlated. Assuming
+ * independence can lead to significant underestimation.
+ * <p>
+ * The first issue is addressed by using a single default selectivity that is
+ * representative of all conjuncts with unknown selectivities.
+ * The second issue is addressed by an exponential backoff when multiplying each
+ * additional selectivity into the final result.
+ */
+ protected double computeCombinedSelectivity(List<Expr> conjuncts) {
+ // Collect all estimated selectivities.
+ List<Double> selectivities = new ArrayList<>();
+ for (Expr e : conjuncts) {
+ if (e.hasSelectivity()) selectivities.add(e.getSelectivity());
+ }
+ if (selectivities.size() != conjuncts.size()) {
+ // Some conjuncts have no estimated selectivity. Use a single default
+ // representative selectivity for all those conjuncts.
+ selectivities.add(Expr.DEFAULT_SELECTIVITY);
+ }
+ // Sort the selectivities to get a consistent estimate, regardless of the original
+ // conjunct order. Sort in ascending order such that the most selective conjunct
+ // is fully applied.
+ Collections.sort(selectivities);
+ double result = 1.0;
+ // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n)
+ for (int i = 0; i < selectivities.size(); ++i) {
+ // Exponential backoff for each selectivity multiplied into the final result.
+ result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1));
+ }
+ // Bound result in [0, 1]
+ return Math.max(0.0, Math.min(1.0, result));
+ }
+
+ protected void capRowCountAtLimit() {
+ if (hasLimit()) {
+ rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit);
+ }
+ }
+
+
+ // Currently it simply adds the number of rows of children
+ protected long deriveRowCount() {
+ for (StatsDeriveResult statsDeriveResult : childrenStatsResult) {
+ rowCount = Math.max(rowCount, statsDeriveResult.getRowCount());
+ }
+ applyConjunctsSelectivity();
+ capRowCountAtLimit();
+ return rowCount;
+ }
+
+
+ protected HashMap<SlotId, Float> deriveColumnToDataSize() {
+ HashMap<SlotId, Float> columnToDataSize = new HashMap<>();
+ for (StatsDeriveResult child : childrenStatsResult) {
+ columnToDataSize.putAll(child.getColumnToDataSize());
+ }
+ return columnToDataSize;
+ }
+
+ protected HashMap<SlotId, Long> deriveColumnToNdv() {
+ HashMap<SlotId, Long> columnToNdv = new HashMap<>();
+ for (StatsDeriveResult child : childrenStatsResult) {
+ columnToNdv.putAll(child.getColumnToNdv());
+ }
+ return columnToNdv;
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java
new file mode 100644
index 0000000000..d663bf5e08
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.planner.PlanNode;
+
+public class DeriveFactory {
+
+ public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) {
+ switch (nodeType) {
+ case AGG_NODE:
+ case HASH_JOIN_NODE:
+ case MERGE_NODE:
+ break;
+ case OLAP_SCAN_NODE:
+ return new OlapScanStatsDerive();
+ case DEFAULT:
+ }
+ return new BaseStatsDerive();
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
new file mode 100644
index 0000000000..ff514aa55e
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.base.Preconditions;
+import org.apache.doris.analysis.SlotDescriptor;
+import org.apache.doris.analysis.SlotId;
+import org.apache.doris.catalog.Catalog;
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.OlapScanNode;
+import org.apache.doris.planner.PlanNode;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class OlapScanStatsDerive extends BaseStatsDerive {
+ // Currently, due to the structure of doris,
+ // the selected materialized view is not determined when calculating the statistical information of scan,
+ // so baseIndex is used for calculation when generating Planner.
+
+ // The rowCount here is the number of rows.
+ private long inputRowCount = -1;
+ private Map<SlotId, Float> slotIdToDataSize;
+ private Map<SlotId, Long> slotIdToNdv;
+ private Map<SlotId, Pair<Long, String>> slotIdToTableIdAndColumnName;
+
+ @Override
+ public void init(PlanNode node) throws UserException {
+ Preconditions.checkState(node instanceof OlapScanNode);
+ super.init(node);
+ buildStructure((OlapScanNode)node);
+ }
+
+ @Override
+ public StatsDeriveResult deriveStats() {
+ /**
+ * Compute InAccurate cardinality before mv selector and tablet pruning.
+ * - Accurate statistical information relies on the selector of materialized views and bucket reduction.
+ * - However, Those both processes occur after the reorder algorithm is completed.
+ * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm.
+ * - So only an inaccurate cardinality can be calculated here.
+ */
+ rowCount = inputRowCount;
+ for (Map.Entry<SlotId, Pair<Long, String>> pairEntry : slotIdToTableIdAndColumnName.entrySet()) {
+ Pair<Long, Float> ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue());
+ long ndv = ndvAndDataSize.first;
+ float dataSize = ndvAndDataSize.second;
+ slotIdToNdv.put(pairEntry.getKey(), ndv);
+ slotIdToDataSize.put(pairEntry.getKey(), dataSize);
+ }
+ return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv);
+ }
+
+ public void buildStructure(OlapScanNode node) {
+ slotIdToDataSize = new HashMap<>();
+ slotIdToNdv = new HashMap<>();
+ if (node.getTupleDesc() != null
+ && node.getTupleDesc().getTable() != null) {
+ long tableId = node.getTupleDesc().getTable().getId();
+ inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager()
+ .getStatistics().getTableStats(tableId).getRowCount();
+ }
+ for (SlotDescriptor slot : node.getTupleDesc().getSlots()) {
+ if (!slot.isMaterialized()) {
+ continue;
+ }
+
+ long tableId = slot.getParent().getTable().getId();
+ String columnName = slot.getColumn().getName();
+ slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName));
+ }
+ }
+
+ //TODO:Implement the getStatistics interface
+ //now there is nothing in statistics, need to wait for collection finished
+ public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) {
+ long ndv = -1;
+ float dataSize = -1;
+ /*
+ if (Catalog.getCurrentCatalog()
+ .getStatisticsManager()
+ .getStatistics()
+ .getColumnStats(pair.first) != null) {
+ ndv = Catalog.getCurrentCatalog()
+ .getStatisticsManager()
+ .getStatistics()
+ .getColumnStats(pair.first).get(pair.second).getNdv();
+ dataSize = Catalog.getCurrentCatalog()
+ .getStatisticsManager()
+ .getStatistics()
+ .getColumnStats(pair.first).get(pair.second).getDataSize();
+ }
+ */
+ return new Pair<>(ndv, dataSize);
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index b17a37858d..58003de90c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -70,4 +70,17 @@ public class Statistics {
}
return tableStats.getNameToColumnStats();
}
+
+ // TODO: mock statistics need to be removed in the future
+ public void mockTableStatsWithRowCount(long tableId, long rowCount) {
+ TableStats tableStats = idToTableStats.get(tableId);
+ if (tableStats == null) {
+ tableStats = new TableStats();
+ idToTableStats.put(tableId, tableStats);
+ }
+
+ if (tableStats.getRowCount() != rowCount) {
+ tableStats.setRowCount(rowCount);
+ }
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
index e994cce212..b1c88e36a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
@@ -136,4 +136,8 @@ public class StatisticsManager {
Table table = db.getTableOrAnalysisException(tableName);
return table;
}
+
+ public Statistics getStatistics() {
+ return statistics;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
new file mode 100644
index 0000000000..b9b0d8024e
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import com.google.common.collect.Maps;
+import org.apache.doris.analysis.SlotId;
+
+import java.util.Map;
+
+// This structure is maintained in each operator to store the statistical information results obtained by the operator.
+public class StatsDeriveResult {
+ private long rowCount = -1;
+ // The data size of the corresponding column in the operator
+ // The actual key is slotId
+ private final Map<SlotId, Float> columnToDataSize = Maps.newHashMap();
+ // The ndv of the corresponding column in the operator
+ // The actual key is slotId
+ private final Map<SlotId, Long> columnToNdv = Maps.newHashMap();
+
+ public StatsDeriveResult(long rowCount, Map<SlotId, Float> columnToDataSize, Map<SlotId, Long> columnToNdv) {
+ this.rowCount = rowCount;
+ this.columnToDataSize.putAll(columnToDataSize);
+ this.columnToNdv.putAll(columnToNdv);
+ }
+
+ public void setRowCount(long rowCount) {
+ this.rowCount = rowCount;
+ }
+
+ public long getRowCount() {
+ return rowCount;
+ }
+
+ public Map<SlotId, Long> getColumnToNdv() {
+ return columnToNdv;
+ }
+
+ public Map<SlotId, Float> getColumnToDataSize() {
+ return columnToDataSize;
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java
new file mode 100644
index 0000000000..e6159a594d
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.common.UserException;
+import org.apache.doris.planner.PlanNode;
+
+
+public class StatsRecursiveDerive {
+ private StatsRecursiveDerive() {}
+
+ public static StatsRecursiveDerive getStatsRecursiveDerive() {
+ return Inner.INSTANCE;
+ }
+
+ private static class Inner {
+ private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive();
+ }
+
+ /**
+ * Recursively complete the derivation of statistics for this node and all its children
+ * @param node
+ * This parameter is an input and output parameter,
+ * which will store the derivation result of statistical information in the corresponding node
+ */
+ public void statsRecursiveDerive(PlanNode node) throws UserException {
+ if (node.getStatsDeriveResult() != null) {
+ return;
+ }
+ for (PlanNode childNode : node.getChildren()) {
+ if (childNode.getStatsDeriveResult() == null) {
+ statsRecursiveDerive(childNode);
+ }
+ }
+ DeriveFactory deriveFactory = new DeriveFactory();
+ BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType());
+ deriveStats.init(node);
+ StatsDeriveResult result = deriveStats.deriveStats();
+ node.setStatsDeriveResult(result);
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
index efb35bbe10..ef494bd9f6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
@@ -94,4 +94,16 @@ public class TableStats {
public Map<String, ColumnStats> getNameToColumnStats() {
return nameToColumnStats;
}
+
+ public long getRowCount() {
+ return rowCount;
+ }
+
+ public long getDataSize() {
+ return dataSize;
+ }
+
+ public void setRowCount(long rowCount) {
+ this.rowCount = rowCount;
+ }
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
index 3fca5643a5..f94e69b142 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java
@@ -72,20 +72,27 @@ public class ExplainTest {
Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql());
}
- public void testExplainSelect() throws Exception {
- String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
+ public void testExplainInsertInto() throws Exception {
+ String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
System.out.println(explainString);
Assert.assertFalse(explainString.contains("CAST"));
}
- public void testExplainInsertInto() throws Exception {
+ public void testExplainVerboseInsertInto() throws Exception {
String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true);
System.out.println(explainString);
Assert.assertTrue(explainString.contains("CAST"));
}
+ public void testExplainSelect() throws Exception {
+ String sql = "explain select * from test_explain.explain_t1 where dt = '1001';";
+ String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false);
+ System.out.println(explainString);
+ Assert.assertFalse(explainString.contains("CAST"));
+ }
+
public void testExplainVerboseSelect() throws Exception {
String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';";
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
index ee09be1d8a..62b741b61b 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@@ -1991,8 +1991,9 @@ public class QueryPlanTest {
public void testExplainInsertInto() throws Exception {
ExplainTest explainTest = new ExplainTest();
explainTest.before(connectContext);
- explainTest.testExplainSelect();
explainTest.testExplainInsertInto();
+ explainTest.testExplainVerboseInsertInto();
+ explainTest.testExplainSelect();
explainTest.testExplainVerboseSelect();
explainTest.testExplainConcatSelect();
explainTest.testExplainVerboseConcatSelect();
@@ -2088,7 +2089,7 @@ public class QueryPlanTest {
"\"storage_medium\" = \"HDD\",\n" +
"\"storage_format\" = \"V2\"\n" +
");\n");
- String queryStr = "EXPLAIN INSERT INTO result_exprs\n" +
+ String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" +
"SELECT a.aid,\n" +
" b.bid\n" +
"FROM\n" +
@@ -2098,7 +2099,7 @@ public class QueryPlanTest {
String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr);
Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4"));
System.out.println(explainString);
- Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4"));
+ Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4"));
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org