You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/06/22 14:46:29 UTC
[doris] branch master updated: [feature](Nereids): cost and stats; (#10199)
This is an automated email from the ASF dual-hosted git repository.
lingmiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2bd8b0713a [feature](Nereids): cost and stats; (#10199)
2bd8b0713a is described below
commit 2bd8b0713ad3f6dcd950da994543974e64f00e09
Author: jakevin <30...@users.noreply.github.com>
AuthorDate: Wed Jun 22 22:46:22 2022 +0800
[feature](Nereids): cost and stats; (#10199)
feature: cost and stats;
+ Add cost model data struct.
+ Add operator visitor for calculating cost.
+ Add plan context.
---
.../org/apache/doris/nereids/OperatorVisitor.java | 62 ++++++++++++
.../java/org/apache/doris/nereids/PlanContext.java | 106 +++++++++++++++++++++
.../apache/doris/nereids/cost/CostCalculator.java | 102 ++++++++++++++++++++
.../apache/doris/nereids/cost/CostEstimate.java | 74 ++++++++++++++
.../doris/nereids/operators/AbstractOperator.java | 13 ++-
.../apache/doris/nereids/operators/Operator.java | 4 +
.../doris/nereids/operators/plans/JoinType.java | 4 +
.../doris/nereids/trees/expressions/ExprId.java | 2 +-
.../doris/nereids/trees/expressions/Slot.java | 7 +-
.../trees/plans/PhysicalPlanTranslator.java | 2 +-
.../apache/doris/statistics/StatsDeriveResult.java | 32 ++++++-
11 files changed, 399 insertions(+), 9 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/OperatorVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/OperatorVisitor.java
new file mode 100644
index 0000000000..7b168355ca
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/OperatorVisitor.java
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids;
+
+import org.apache.doris.nereids.operators.Operator;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalFilter;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalProject;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalSort;
+
+/**
+ * Base class for the processing of logical and physical operator.
+ *
+ * @param <R> Return type of each visit method.
+ * @param <C> Context type.
+ */
+public abstract class OperatorVisitor<R, C> {
+ /* Operator */
+
+ public abstract R visitOperator(Operator operator, C context);
+
+ public R visitPhysicalAggregation(PhysicalAggregation physicalAggregation, C context) {
+ return null;
+ }
+
+ public R visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, C context) {
+ return null;
+ }
+
+ public R visitPhysicalSort(PhysicalSort physicalSort, C context) {
+ return null;
+ }
+
+ public R visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, C context) {
+ return null;
+ }
+
+ public R visitPhysicalProject(PhysicalProject physicalProject, C context) {
+ return null;
+ }
+
+ public R visitPhysicalFilter(PhysicalFilter physicalFilter, C context) {
+ return null;
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
new file mode 100644
index 0000000000..50ead51f0e
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+package org.apache.doris.nereids;
+
+import org.apache.doris.common.Id;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.properties.LogicalProperties;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+import java.util.List;
+
+/**
+ * Context for plan.
+ * Abstraction for group expressions and stand-alone expressions/DAGs.
+ * A ExpressionHandle is attached to either {@link Plan} or {@link GroupExpression}.
+ * Inspired by GPORCA-CExpressionHandle.
+ */
+public class PlanContext {
+ // array of children's derived stats
+ private final List<StatsDeriveResult> childrenStats = Lists.newArrayList();
+ // statistics of attached plan/gexpr
+ private StatsDeriveResult statistics;
+ // attached plan
+ private Plan plan;
+ // attached group expression
+ private GroupExpression groupExpression;
+
+ public PlanContext(Plan plan) {
+ this.plan = plan;
+ }
+
+ public PlanContext(GroupExpression groupExpression) {
+ this.groupExpression = groupExpression;
+ }
+
+ public Plan getPlan() {
+ return plan;
+ }
+
+ public GroupExpression getGroupExpression() {
+ return groupExpression;
+ }
+
+ public List<StatsDeriveResult> getChildrenStats() {
+ return childrenStats;
+ }
+
+ public StatsDeriveResult getStatistics() {
+ return statistics;
+ }
+
+ public void setStatistics(StatsDeriveResult stats) {
+ this.statistics = stats;
+ }
+
+ public StatsDeriveResult getStatisticsWithCheck() {
+ Preconditions.checkNotNull(statistics);
+ return statistics;
+ }
+
+ public LogicalProperties childLogicalPropertyAt(int index) {
+ return plan.child(index).getLogicalProperties();
+ }
+
+ public List<Slot> getChildOutputSlots(int index) {
+ return childLogicalPropertyAt(index).getOutput();
+ }
+
+ public List<Id> getChildOutputIds(int index) {
+ List<Id> ids = Lists.newArrayList();
+ childLogicalPropertyAt(index).getOutput().forEach(slot -> {
+ ids.add(slot.getId());
+ });
+ return ids;
+ }
+
+ /**
+ * Get child statistics.
+ */
+ public StatsDeriveResult getChildStatistics(int index) {
+ StatsDeriveResult statistics = childrenStats.get(index);
+ Preconditions.checkNotNull(statistics);
+ return statistics;
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java
new file mode 100644
index 0000000000..90cc2f8751
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.cost;
+
+import org.apache.doris.common.Id;
+import org.apache.doris.nereids.OperatorVisitor;
+import org.apache.doris.nereids.PlanContext;
+import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.operators.Operator;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan;
+import org.apache.doris.nereids.operators.plans.physical.PhysicalProject;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+
+/**
+ * Calculate the cost of a plan.
+ * Inspired by Presto.
+ */
+public class CostCalculator {
+ /**
+ * Constructor.
+ */
+ public double calculateCost(GroupExpression groupExpression) {
+ PlanContext planContext = new PlanContext(groupExpression);
+ CostEstimator costCalculator = new CostEstimator();
+ CostEstimate costEstimate = groupExpression.getOperator().accept(costCalculator, planContext);
+ return costFormula(costEstimate);
+ }
+
+ private double costFormula(CostEstimate costEstimate) {
+ double cpuCostWeight = 1;
+ double memoryCostWeight = 1;
+ double networkCostWeight = 1;
+ return costEstimate.getCpuCost() * cpuCostWeight + costEstimate.getMemoryCost() * memoryCostWeight
+ + costEstimate.getNetworkCost() * networkCostWeight;
+ }
+
+ private static class CostEstimator extends OperatorVisitor<CostEstimate, PlanContext> {
+ @Override
+ public CostEstimate visitOperator(Operator operator, PlanContext context) {
+ return CostEstimate.zero();
+ }
+
+ @Override
+ public CostEstimate visitPhysicalAggregation(PhysicalAggregation physicalAggregation, PlanContext context) {
+ StatsDeriveResult statistics = context.getStatisticsWithCheck();
+ return CostEstimate.ofCpu(statistics.computeSize());
+ }
+
+ @Override
+ public CostEstimate visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, PlanContext context) {
+ StatsDeriveResult statistics = context.getStatisticsWithCheck();
+ return CostEstimate.ofCpu(statistics.computeSize());
+ }
+
+ @Override
+ public CostEstimate visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, PlanContext context) {
+ Preconditions.checkState(context.getGroupExpression().arity() == 2);
+ Preconditions.checkState(context.getChildrenStats().size() == 2);
+
+ StatsDeriveResult leftStatistics = context.getChildStatistics(0);
+ StatsDeriveResult rightStatistics = context.getChildStatistics(1);
+
+ // TODO: handle some case
+
+ List<Id> leftIds = context.getChildOutputIds(0);
+ List<Id> rightIds = context.getChildOutputIds(1);
+
+ // handle cross join, onClause is empty .....
+
+ return new CostEstimate(
+ leftStatistics.computeColumnSize(leftIds) + rightStatistics.computeColumnSize(rightIds),
+ rightStatistics.computeColumnSize(rightIds), 0);
+ }
+
+ @Override
+ public CostEstimate visitPhysicalProject(PhysicalProject physicalProject, PlanContext context) {
+ StatsDeriveResult statistics = context.getStatisticsWithCheck();
+ return CostEstimate.ofCpu(statistics.computeSize());
+ }
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java
new file mode 100644
index 0000000000..af2002e4cb
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.cost;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Use for estimating the cost of plan.
+ */
+public final class CostEstimate {
+ private static final CostEstimate INFINITE =
+ new CostEstimate(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
+ private static final CostEstimate ZERO = new CostEstimate(0, 0, 0);
+
+ private final double cpuCost;
+ private final double memoryCost;
+ private final double networkCost;
+
+ /**
+ * Constructor of CostEstimate.
+ */
+ public CostEstimate(double cpuCost, double memoryCost, double networkCost) {
+ Preconditions.checkArgument(!(cpuCost < 0), "cpuCost cannot be negative: %s", cpuCost);
+ Preconditions.checkArgument(!(memoryCost < 0), "memoryCost cannot be negative: %s", memoryCost);
+ Preconditions.checkArgument(!(networkCost < 0), "networkCost cannot be negative: %s", networkCost);
+ this.cpuCost = cpuCost;
+ this.memoryCost = memoryCost;
+ this.networkCost = networkCost;
+ }
+
+ public static CostEstimate infinite() {
+ return INFINITE;
+ }
+
+ public static CostEstimate zero() {
+ return ZERO;
+ }
+
+ public double getCpuCost() {
+ return cpuCost;
+ }
+
+ public double getMemoryCost() {
+ return memoryCost;
+ }
+
+ public double getNetworkCost() {
+ return networkCost;
+ }
+
+ public static CostEstimate ofCpu(double cpuCost) {
+ return new CostEstimate(cpuCost, 0, 0);
+ }
+
+ public static CostEstimate ofMemory(double memoryCost) {
+ return new CostEstimate(0, memoryCost, 0);
+ }
+
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java
index ca1b5f3ed7..d3b7a25e92 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java
@@ -17,6 +17,7 @@
package org.apache.doris.nereids.operators;
+import org.apache.doris.nereids.OperatorVisitor;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.PlanOperatorVisitor;
@@ -48,14 +49,22 @@ public abstract class AbstractOperator implements Operator {
* Child operator should overwrite this method.
* for example:
* <code>
- * visitor.visitPhysicalOlapScanPlan(
- * (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context);
+ * visitor.visitPhysicalOlapScanPlan(
+ * (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context);
* </code>
*/
public <R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context) {
return null;
}
+ public <R, C> R accept(OperatorVisitor<R, C> visitor, C context) {
+ return visitor.visitOperator(this, context);
+ }
+
+ public <R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context) {
+ return null;
+ }
+
public long getLimited() {
return limited;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java
index 914a1720b4..78524674af 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java
@@ -17,6 +17,7 @@
package org.apache.doris.nereids.operators;
+import org.apache.doris.nereids.OperatorVisitor;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.TreeNode;
import org.apache.doris.nereids.trees.plans.Plan;
@@ -32,4 +33,7 @@ public interface Operator {
<R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context);
+ <R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context);
+
+ <R, C> R accept(OperatorVisitor<R, C> visitor, C context);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java
index 9e2b8d41e3..c4b4bcfd31 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java
@@ -84,6 +84,10 @@ public enum JoinType {
}
}
+ public final boolean isCrossJoin() {
+ return this == CROSS_JOIN;
+ }
+
public final boolean isInnerOrOuterOrCrossJoin() {
return this == INNER_JOIN || this == CROSS_JOIN || this == FULL_OUTER_JOIN;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java
index c900ea42a7..6ed30f678f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java
@@ -44,7 +44,7 @@ public class ExprId extends Id<ExprId> {
}
/**
- * Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}.
+ * Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}.
*/
public static IdGenerator<ExprId> createGenerator() {
return new IdGenerator<ExprId>() {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java
index c235dd19e7..78f1ba3491 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java
@@ -23,10 +23,9 @@ import org.apache.doris.nereids.trees.NodeType;
* Abstract class for all slot in expression.
*/
public abstract class Slot extends NamedExpression implements LeafExpression {
+ private ExprId id;
- private int id;
-
- public Slot(NodeType type, int id, Expression... children) {
+ public Slot(NodeType type, ExprId id, Expression... children) {
super(type, children);
this.id = id;
}
@@ -40,7 +39,7 @@ public abstract class Slot extends NamedExpression implements LeafExpression {
return this;
}
- public int getId() {
+ public ExprId getId() {
return id;
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java
index 9b9e3c5798..4544ea7bd7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java
@@ -277,7 +277,7 @@ public class PhysicalPlanTranslator extends PlanOperatorVisitor<PlanFragment, Pl
tupleDescriptor.setTable(table);
for (Slot slot : slotList) {
SlotReference slotReference = (SlotReference) slot;
- SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId());
+ SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId().asInt());
slotDescriptor.setColumn(slotReference.getColumn());
slotDescriptor.setType(slotReference.getDataType().toCatalogDataType());
slotDescriptor.setIsMaterialized(true);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
index 1888b3dd45..4f2abb5691 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@@ -21,9 +21,13 @@ import org.apache.doris.common.Id;
import com.google.common.collect.Maps;
+import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
-// This structure is maintained in each operator to store the statistical information results obtained by the operator.
+/**
+ * This structure is maintained in each operator to store the statistical information results obtained by the operator.
+ */
public class StatsDeriveResult {
private long rowCount = -1;
// The data size of the corresponding column in the operator
@@ -39,6 +43,32 @@ public class StatsDeriveResult {
this.columnToNdv.putAll(columnToNdv);
}
+ public float computeSize() {
+ return Math.max(1, columnToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount;
+ }
+
+ /**
+ * Compute the data size of all input columns.
+ *
+ * @param slotIds all input columns.
+ * @return sum data size.
+ */
+ public float computeColumnSize(List<Id> slotIds) {
+ float count = 0;
+ boolean exist = false;
+
+ for (Entry<Id, Float> entry : columnToDataSize.entrySet()) {
+ if (slotIds.contains(entry.getKey())) {
+ count += entry.getValue();
+ exist = true;
+ }
+ }
+ if (!exist) {
+ return 1;
+ }
+ return count * rowCount;
+ }
+
public void setRowCount(long rowCount) {
this.rowCount = rowCount;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org