You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/19 16:40:04 UTC
[doris] branch master updated: [fix](nereids) stats calculator lost column statistics on limit node (#14759)
This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a086f67255 [fix](nereids) stats calculator lost column statistics on limit node (#14759)
a086f67255 is described below
commit a086f67255f4d6027a40d66b9f62fbb37c31a008
Author: minghong <mi...@163.com>
AuthorDate: Tue Dec 20 00:39:57 2022 +0800
[fix](nereids) stats calculator lost column statistics on limit node (#14759)
`select avg(id) from (select id from t1 limit 1);`
above sql encounters NPE, because stats for limit node lost column statistics
---
.../doris/nereids/stats/StatsCalculator.java | 6 +--
.../apache/doris/statistics/ColumnStatistic.java | 29 +++++++----
.../apache/doris/statistics/StatsDeriveResult.java | 20 +++-----
.../doris/statistics/StatsDeriveResultTest.java | 57 ++++++++++++++++++++++
4 files changed, 86 insertions(+), 26 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 2e8cd7abff..31975280e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -276,7 +276,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private StatsDeriveResult computeAssertNumRows(long desiredNumOfRows) {
StatsDeriveResult statsDeriveResult = groupExpression.childStatistics(0);
- statsDeriveResult.updateRowCountByLimit(1);
+ statsDeriveResult.updateByLimit(1);
return statsDeriveResult;
}
@@ -315,12 +315,12 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private StatsDeriveResult computeTopN(TopN topN) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
- return stats.updateRowCountByLimit(topN.getLimit());
+ return stats.updateByLimit(topN.getLimit());
}
private StatsDeriveResult computeLimit(Limit limit) {
StatsDeriveResult stats = groupExpression.childStatistics(0);
- return stats.updateRowCountByLimit(limit.getLimit());
+ return stats.updateByLimit(limit.getLimit());
}
private StatsDeriveResult computeAggregate(Aggregate aggregate) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 4865767496..a0530a7f59 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -90,7 +90,7 @@ public class ColumnStatistic {
public ColumnStatistic(double count, double ndv, double avgSizeByte,
double numNulls, double dataSize, double minValue, double maxValue,
double selectivity, LiteralExpr minExpr,
- LiteralExpr maxExpr, boolean isNaN) {
+ LiteralExpr maxExpr, boolean isUnKnown) {
this.count = count;
this.ndv = ndv;
this.avgSizeByte = avgSizeByte;
@@ -101,7 +101,7 @@ public class ColumnStatistic {
this.selectivity = selectivity;
this.minExpr = minExpr;
this.maxExpr = maxExpr;
- this.isUnKnown = isNaN;
+ this.isUnKnown = isUnKnown;
}
// TODO: use thrift
@@ -158,18 +158,29 @@ public class ColumnStatistic {
.setSelectivity(selectivity).setIsUnknown(isUnKnown).build();
}
- public ColumnStatistic multiply(double d) {
+ public ColumnStatistic updateByLimit(long limit, double rowCount) {
+ double ratio = 0;
+ if (rowCount != 0) {
+ ratio = limit / rowCount;
+ }
+ double newNdv = Math.ceil(Math.min(ndv, limit));
+ double newSelectivity = selectivity;
+ if (newNdv != 0) {
+ newSelectivity = newSelectivity * newNdv / ndv;
+ } else {
+ newSelectivity = 0;
+ }
return new ColumnStatisticBuilder()
- .setCount(Math.ceil(count * d))
- .setNdv(Math.ceil(ndv * d))
- .setAvgSizeByte(Math.ceil(avgSizeByte * d))
- .setNumNulls(Math.ceil(numNulls * d))
- .setDataSize(Math.ceil(dataSize * d))
+ .setCount(Math.ceil(limit))
+ .setNdv(newNdv)
+ .setAvgSizeByte(Math.ceil(avgSizeByte))
+ .setNumNulls(Math.ceil(numNulls * ratio))
+ .setDataSize(Math.ceil(dataSize * ratio))
.setMinValue(minValue)
.setMaxValue(maxValue)
.setMinExpr(minExpr)
.setMaxExpr(maxExpr)
- .setSelectivity(selectivity)
+ .setSelectivity(newSelectivity)
.setIsUnknown(isUnKnown)
.build();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
index 84c21c7b9d..f6a124b81d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@@ -20,6 +20,8 @@ package org.apache.doris.statistics;
import org.apache.doris.common.Id;
import org.apache.doris.nereids.trees.expressions.Slot;
+import com.google.common.base.Preconditions;
+
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -122,14 +124,12 @@ public class StatsDeriveResult {
return statsDeriveResult;
}
- public StatsDeriveResult updateRowCountByLimit(long limit) {
+ public StatsDeriveResult updateByLimit(long limit) {
+ Preconditions.checkArgument(limit >= 0);
+ limit = Math.min(limit, (long) rowCount);
StatsDeriveResult statsDeriveResult = new StatsDeriveResult(limit, width, penalty);
- double selectivity = 1.0;
- if (limit > 0 && rowCount > 0 && rowCount > limit) {
- selectivity = ((double) limit) / rowCount;
- }
for (Entry<Id, ColumnStatistic> entry : slotIdToColumnStats.entrySet()) {
- statsDeriveResult.addColumnStats(entry.getKey(), entry.getValue().multiply(selectivity));
+ statsDeriveResult.addColumnStats(entry.getKey(), entry.getValue().updateByLimit(limit, rowCount));
}
return statsDeriveResult;
}
@@ -162,14 +162,6 @@ public class StatsDeriveResult {
}
}
- public StatsDeriveResult updateRowCountOnCopy(double selectivity) {
- StatsDeriveResult copy = new StatsDeriveResult(rowCount * selectivity, width, penalty);
- for (Entry<Id, ColumnStatistic> entry : slotIdToColumnStats.entrySet()) {
- copy.addColumnStats(entry.getKey(), entry.getValue().multiply(selectivity));
- }
- return copy;
- }
-
public StatsDeriveResult updateRowCount(double rowCount) {
return new StatsDeriveResult(rowCount, width, penalty, slotIdToColumnStats);
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
new file mode 100644
index 0000000000..0f2cebf511
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.common.Id;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class StatsDeriveResultTest {
+ @Test
+ public void testUpdateRowCountByLimit() {
+ StatsDeriveResult stats = new StatsDeriveResult(100);
+ ColumnStatistic a = new ColumnStatistic(100, 10, 1, 5, 10,
+ 1, 100, 0.5, null, null, false);
+ Id id = new Id(1);
+ stats.addColumnStats(id, a);
+ StatsDeriveResult res = stats.updateByLimit(0);
+ Assertions.assertEquals(0, res.getRowCount());
+ Assertions.assertEquals(1, res.getSlotIdToColumnStats().size());
+ ColumnStatistic resColStats = res.getColumnStatsBySlotId(id);
+ Assertions.assertEquals(0, resColStats.ndv);
+ Assertions.assertEquals(1, resColStats.avgSizeByte);
+ Assertions.assertEquals(0, resColStats.numNulls);
+ Assertions.assertEquals(0, resColStats.dataSize);
+ Assertions.assertEquals(1, resColStats.minValue);
+ Assertions.assertEquals(100, resColStats.maxValue);
+ Assertions.assertEquals(0, resColStats.selectivity);
+ Assertions.assertEquals(false, resColStats.isUnKnown);
+
+ res = stats.updateByLimit(1);
+ resColStats = res.getColumnStatsBySlotId(id);
+ Assertions.assertEquals(1, resColStats.ndv);
+ Assertions.assertEquals(1, resColStats.avgSizeByte);
+ Assertions.assertEquals(1, resColStats.numNulls);
+ Assertions.assertEquals(1, resColStats.dataSize);
+ Assertions.assertEquals(1, resColStats.minValue);
+ Assertions.assertEquals(100, resColStats.maxValue);
+ Assertions.assertEquals(0.05, resColStats.selectivity);
+ Assertions.assertEquals(false, resColStats.isUnKnown);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org