You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ga...@apache.org on 2023/04/17 02:50:19 UTC
[doris] branch master updated: [enhancement](Nereids) optimize bloom filter size reducing strategy (#18596)
This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3eac53f75d [enhancement](Nereids) optimize bloom filter size reducing strategy (#18596)
3eac53f75d is described below
commit 3eac53f75d5f3eb05e958403efeb7578ad86e438
Author: morrySnow <10...@users.noreply.github.com>
AuthorDate: Mon Apr 17 10:50:08 2023 +0800
[enhancement](Nereids) optimize bloom filter size reducing strategy (#18596)
---
.../glue/translator/RuntimeFilterTranslator.java | 12 +----------
.../processor/post/RuntimeFilterGenerator.java | 25 +++++++++++++++++++---
.../apache/doris/nereids/stats/StatsMathUtil.java | 22 +++++++++++++++++++
.../trees/plans/physical/RuntimeFilter.java | 22 ++++++++++++-------
.../java/org/apache/doris/planner/PlanNode.java | 8 +++++++
.../org/apache/doris/planner/RuntimeFilter.java | 18 ++++++++++------
.../doris/planner/RuntimeFilterGenerator.java | 11 ----------
7 files changed, 78 insertions(+), 40 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
index 6138596620..8816c9856b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/RuntimeFilterTranslator.java
@@ -34,7 +34,6 @@ import org.apache.doris.planner.HashJoinNode;
import org.apache.doris.planner.HashJoinNode.DistributionMode;
import org.apache.doris.planner.JoinNodeBase;
import org.apache.doris.planner.RuntimeFilter.RuntimeFilterTarget;
-import org.apache.doris.planner.RuntimeFilterGenerator.FilterSizeLimits;
import org.apache.doris.planner.ScanNode;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.thrift.TRuntimeFilterType;
@@ -126,20 +125,11 @@ public class RuntimeFilterTranslator {
if (!src.getType().equals(target.getType()) && filter.getType() != TRuntimeFilterType.BITMAP) {
targetExpr = new CastExpr(src.getType(), targetExpr);
}
- FilterSizeLimits filterSizeLimits = context.getLimits();
- if (node instanceof HashJoinNode
- && !(((HashJoinNode) node).getDistributionMode() == DistributionMode.BROADCAST)
- && ConnectContext.get() != null
- && ConnectContext.get().getSessionVariable().enablePipelineEngine()
- && ConnectContext.get().getSessionVariable().getParallelExecInstanceNum() > 0) {
- filterSizeLimits = filterSizeLimits.adjustForParallel(
- ConnectContext.get().getSessionVariable().getParallelExecInstanceNum());
- }
org.apache.doris.planner.RuntimeFilter origFilter
= org.apache.doris.planner.RuntimeFilter.fromNereidsRuntimeFilter(
filter.getId(), node, src, filter.getExprOrder(), targetExpr,
ImmutableMap.of(targetTupleId, ImmutableList.of(targetSlotId)),
- filter.getType(), filterSizeLimits);
+ filter.getType(), context.getLimits(), filter.getBuildSideNdv());
if (node instanceof HashJoinNode) {
origFilter.setIsBroadcast(((HashJoinNode) node).getDistributionMode() == DistributionMode.BROADCAST);
} else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java
index ba239a2e0d..77d3c92046 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterGenerator.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.processor.post;
import org.apache.doris.common.IdGenerator;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
+import org.apache.doris.nereids.stats.StatsMathUtil;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
@@ -27,6 +28,7 @@ import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapContains;
+import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.ObjectId;
import org.apache.doris.nereids.trees.plans.Plan;
@@ -46,6 +48,7 @@ import com.google.common.collect.ImmutableSet;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -111,8 +114,24 @@ public class RuntimeFilterGenerator extends PlanPostProcessor {
continue;
}
Slot olapScanSlot = aliasTransferMap.get(unwrappedSlot).second;
+ long buildSideNdv = -1L;
+ AbstractPlan right = (AbstractPlan) join.right();
+ if (right.getStats() != null) {
+ List<Double> ndvs = join.getHashJoinConjuncts().stream()
+ .map(Expression::getInputSlots)
+ .flatMap(Set::stream)
+ .filter(s -> right.getOutputExprIdSet().contains(s.getExprId()))
+ .map(s -> right.getStats().columnStatistics().get(s))
+ .filter(Objects::nonNull)
+ .map(cs -> cs.ndv)
+ .collect(Collectors.toList());
+ buildSideNdv = (long) StatsMathUtil.jointNdv(ndvs);
+ if (buildSideNdv <= 0 || buildSideNdv > right.getStats().getRowCount()) {
+ buildSideNdv = (long) right.getStats().getRowCount();
+ }
+ }
RuntimeFilter filter = new RuntimeFilter(generator.getNextId(),
- equalTo.right(), olapScanSlot, type, i, join);
+ equalTo.right(), olapScanSlot, type, i, join, buildSideNdv);
ctx.addJoinToTargetMap(join, olapScanSlot.getExprId());
ctx.setTargetExprIdToFilter(olapScanSlot.getExprId(), filter);
ctx.setTargetsOnScanNode(aliasTransferMap.get(unwrappedSlot).first, olapScanSlot);
@@ -150,7 +169,7 @@ public class RuntimeFilterGenerator extends PlanPostProcessor {
for (int i = 0; i < bitmapRFCount; i++) {
Expression bitmapRuntimeFilterCondition = bitmapRuntimeFilterConditions.get(i);
boolean isNot = bitmapRuntimeFilterCondition instanceof Not;
- BitmapContains bitmapContains = null;
+ BitmapContains bitmapContains;
if (bitmapRuntimeFilterCondition instanceof Not) {
bitmapContains = (BitmapContains) bitmapRuntimeFilterCondition.child(0);
} else {
@@ -163,7 +182,7 @@ public class RuntimeFilterGenerator extends PlanPostProcessor {
Slot olapScanSlot = aliasTransferMap.get(targetSlot).second;
RuntimeFilter filter = new RuntimeFilter(generator.getNextId(),
bitmapContains.child(0), olapScanSlot,
- bitmapContains.child(1), type, i, join, isNot);
+ bitmapContains.child(1), type, i, join, isNot, -1L);
ctx.addJoinToTargetMap(join, olapScanSlot.getExprId());
ctx.setTargetExprIdToFilter(olapScanSlot.getExprId(), filter);
ctx.setTargetsOnScanNode(aliasTransferMap.get(targetSlot).first, olapScanSlot);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java
index d70893ab96..24405c2c43 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsMathUtil.java
@@ -17,6 +17,11 @@
package org.apache.doris.nereids.stats;
+import org.apache.commons.collections.CollectionUtils;
+
+import java.util.Collections;
+import java.util.List;
+
/**
* Math util for statistics derivation
*/
@@ -59,4 +64,21 @@ public class StatsMathUtil {
return a / nonZeroDivisor(b);
}
+ /**
+ * compute the multi columns unite ndv
+ */
+ public static double jointNdv(List<Double> ndvs) {
+ if (CollectionUtils.isEmpty(ndvs)) {
+ return -1;
+ }
+ if (ndvs.stream().anyMatch(n -> n <= 0)) {
+ return -1;
+ }
+ ndvs.sort(Collections.reverseOrder());
+ double multiNdv = 1;
+ for (int i = 0; i < ndvs.size(); i++) {
+ multiNdv = multiNdv * Math.pow(ndvs.get(i), 1 / Math.pow(2, i));
+ }
+ return multiNdv;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java
index 239b9d6737..12a545a22f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/RuntimeFilter.java
@@ -32,27 +32,29 @@ public class RuntimeFilter {
private final Expression srcSlot;
//bitmap filter support target expression like k1+1, abs(k1)
//targetExpression is an expression on targetSlot, in which there is only one non-const slot
- private Expression targetExpression;
- private Slot targetSlot;
+ private final Expression targetExpression;
+ private final Slot targetSlot;
private final int exprOrder;
- private AbstractPhysicalJoin builderNode;
+ private final AbstractPhysicalJoin builderNode;
- private boolean bitmapFilterNotIn;
+ private final boolean bitmapFilterNotIn;
+
+ private final long buildSideNdv;
/**
* constructor
*/
public RuntimeFilter(RuntimeFilterId id, Expression src, Slot target, TRuntimeFilterType type,
- int exprOrder, AbstractPhysicalJoin builderNode) {
- this(id, src, target, target, type, exprOrder, builderNode, false);
+ int exprOrder, AbstractPhysicalJoin builderNode, long buildSideNdv) {
+ this(id, src, target, target, type, exprOrder, builderNode, false, buildSideNdv);
}
/**
* constructor
*/
public RuntimeFilter(RuntimeFilterId id, Expression src, Slot target, Expression targetExpression,
- TRuntimeFilterType type,
- int exprOrder, AbstractPhysicalJoin builderNode, boolean bitmapFilterNotIn) {
+ TRuntimeFilterType type, int exprOrder, AbstractPhysicalJoin builderNode, boolean bitmapFilterNotIn,
+ long buildSideNdv) {
this.id = id;
this.srcSlot = src;
this.targetSlot = target;
@@ -61,6 +63,7 @@ public class RuntimeFilter {
this.exprOrder = exprOrder;
this.builderNode = builderNode;
this.bitmapFilterNotIn = bitmapFilterNotIn;
+ this.buildSideNdv = buildSideNdv <= 0 ? -1L : buildSideNdv;
}
public Expression getSrcExpr() {
@@ -95,4 +98,7 @@ public class RuntimeFilter {
return targetExpression;
}
+ public long getBuildSideNdv() {
+ return buildSideNdv;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
index 4e3412b760..7ab8963de3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
@@ -317,6 +317,14 @@ public abstract class PlanNode extends TreeNode<PlanNode> implements PlanStats {
return cardinality;
}
+ public long getCardinalityAfterFilter() {
+ if (cardinalityAfterFilter < 0) {
+ return cardinality;
+ } else {
+ return cardinalityAfterFilter;
+ }
+ }
+
public int getNumNodes() {
return numNodes;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java
index 58d0086ed6..2a1f13b9c1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilter.java
@@ -134,8 +134,8 @@ public final class RuntimeFilter {
}
private RuntimeFilter(RuntimeFilterId filterId, PlanNode filterSrcNode, Expr srcExpr, int exprOrder,
- Expr origTargetExpr, Map<TupleId, List<SlotId>> targetSlots,
- TRuntimeFilterType type, RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits) {
+ Expr origTargetExpr, Map<TupleId, List<SlotId>> targetSlots, TRuntimeFilterType type,
+ RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits, long buildSizeNdv) {
this.id = filterId;
this.builderNode = filterSrcNode;
this.srcExpr = srcExpr;
@@ -143,6 +143,7 @@ public final class RuntimeFilter {
this.origTargetExpr = origTargetExpr;
this.targetSlotsByTid = targetSlots;
this.runtimeFilterType = type;
+ this.ndvEstimate = buildSizeNdv;
computeNdvEstimate();
calculateFilterSize(filterSizeLimits);
}
@@ -150,8 +151,9 @@ public final class RuntimeFilter {
// only for nereids planner
public static RuntimeFilter fromNereidsRuntimeFilter(RuntimeFilterId id, JoinNodeBase node, Expr srcExpr,
int exprOrder, Expr origTargetExpr, Map<TupleId, List<SlotId>> targetSlots,
- TRuntimeFilterType type, RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits) {
- return new RuntimeFilter(id, node, srcExpr, exprOrder, origTargetExpr, targetSlots, type, filterSizeLimits);
+ TRuntimeFilterType type, RuntimeFilterGenerator.FilterSizeLimits filterSizeLimits, long buildSizeNdv) {
+ return new RuntimeFilter(id, node, srcExpr, exprOrder, origTargetExpr,
+ targetSlots, type, filterSizeLimits, buildSizeNdv);
}
@Override
@@ -306,7 +308,7 @@ public final class RuntimeFilter {
}
return new RuntimeFilter(idGen.getNextId(), filterSrcNode, srcExpr, exprOrder,
- targetExpr, targetSlots, type, filterSizeLimits);
+ targetExpr, targetSlots, type, filterSizeLimits, -1L);
}
public static RuntimeFilter create(IdGenerator<RuntimeFilterId> idGen, Analyzer analyzer, Expr joinPredicate,
@@ -343,7 +345,7 @@ public final class RuntimeFilter {
RuntimeFilter runtimeFilter =
new RuntimeFilter(idGen.getNextId(), filterSrcNode, srcExpr, exprOrder, targetExpr, targetSlots,
- type, filterSizeLimits);
+ type, filterSizeLimits, -1L);
runtimeFilter.setBitmapFilterNotIn(((BitmapFilterPredicate) joinPredicate).isNotIn());
return runtimeFilter;
}
@@ -515,7 +517,9 @@ public final class RuntimeFilter {
}
public void computeNdvEstimate() {
- ndvEstimate = builderNode.getChild(1).getCardinality();
+ if (ndvEstimate < 0) {
+ ndvEstimate = builderNode.getChild(1).getCardinalityAfterFilter();
+ }
}
public void extractTargetsPosition() {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java
index 2d4e57702c..a88f5d73bf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RuntimeFilterGenerator.java
@@ -117,17 +117,6 @@ public final class RuntimeFilterGenerator {
defaultValue = Math.max(defaultValue, minVal);
defaultVal = BitUtil.roundUpToPowerOf2(Math.min(defaultValue, maxVal));
}
-
- private FilterSizeLimits(long maxVal, long minVal, long defaultVal) {
- this.maxVal = BitUtil.roundUpToPowerOf2(maxVal);
- this.minVal = BitUtil.roundUpToPowerOf2(minVal);
- defaultVal = Math.max(defaultVal, this.minVal);
- this.defaultVal = BitUtil.roundUpToPowerOf2(Math.min(defaultVal, this.maxVal));
- }
-
- public FilterSizeLimits adjustForParallel(int parallel) {
- return new FilterSizeLimits(maxVal / parallel, minVal / parallel, defaultVal / parallel);
- }
}
// Contains size limits for bloom filters.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org