You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ja...@apache.org on 2022/10/11 09:23:59 UTC
[doris] branch master updated: [feature](nereids) refactor statistics framework and introduce StatsCalculatorV2 (#12987)
This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3c5e7e2f24 [feature](nereids) refactor statistics framework and introduce StatsCalculatorV2 (#12987)
3c5e7e2f24 is described below
commit 3c5e7e2f24d19350fd5ae1919a108e3f6fb8dd74
Author: Kikyou1997 <33...@users.noreply.github.com>
AuthorDate: Tue Oct 11 17:23:49 2022 +0800
[feature](nereids) refactor statistics framework and introduce StatsCalculatorV2 (#12987)
* squash
change data type of metrics to double
unit test
add stats for some function
add stats for arithmeticExpr
1. set max/min of ColumnStats to double
2. add stats for binaryExpr/compoundExpr
in predicate
* Add LiteralExpr in ColumnStat just for user display only.
---
.../doris/analysis/AlterColumnStatsStmt.java | 14 +-
.../apache/doris/analysis/ShowColumnStatsStmt.java | 14 +-
.../org/apache/doris/analysis/SlotDescriptor.java | 2 +-
.../org/apache/doris/analysis/StringLiteral.java | 2 +-
.../java/org/apache/doris/common/CheckedMath.java | 12 +
.../main/java/org/apache/doris/common/Config.java | 1 +
.../java/org/apache/doris/common/util/Util.java | 6 +-
.../apache/doris/nereids/cost/CostCalculator.java | 2 +-
.../nereids/jobs/cascades/CostAndEnforcerJob.java | 1 -
.../doris/nereids/stats/ExpressionEstimation.java | 210 ++++++++++
.../doris/nereids/stats/FilterEstimation.java | 300 +++++++++++++++
.../nereids/stats/FilterSelectivityCalculator.java | 10 +-
.../apache/doris/nereids/stats/JoinEstimation.java | 36 +-
.../doris/nereids/stats/StatsCalculator.java | 46 ++-
...StatsCalculator.java => StatsCalculatorV2.java} | 81 ++--
.../nereids/trees/expressions/Expression.java | 1 +
.../expressions/functions/scalar/Substring.java | 6 +
.../expressions/functions/scalar/WeekOfYear.java | 5 +
.../trees/expressions/functions/scalar/Year.java | 6 +
.../nereids/trees/expressions/literal/Literal.java | 3 +
.../expressions/visitor/ExpressionVisitor.java | 15 +
.../java/org/apache/doris/nereids/util/Utils.java | 7 +
.../org/apache/doris/planner/AggregationNode.java | 2 +-
.../org/apache/doris/planner/AnalyticEvalNode.java | 2 +-
.../apache/doris/planner/AssertNumRowsNode.java | 2 +-
.../org/apache/doris/planner/CrossJoinNode.java | 2 +-
.../org/apache/doris/planner/EmptySetNode.java | 2 +-
.../org/apache/doris/planner/ExchangeNode.java | 2 +-
.../org/apache/doris/planner/HashJoinNode.java | 4 +-
.../org/apache/doris/planner/JdbcScanNode.java | 2 +-
.../org/apache/doris/planner/MysqlScanNode.java | 2 +-
.../org/apache/doris/planner/OdbcScanNode.java | 2 +-
.../org/apache/doris/planner/OlapScanNode.java | 2 +-
.../java/org/apache/doris/planner/PlanNode.java | 2 +-
.../java/org/apache/doris/planner/RepeatNode.java | 2 +-
.../java/org/apache/doris/planner/SelectNode.java | 2 +-
.../java/org/apache/doris/planner/SortNode.java | 4 +-
.../apache/doris/planner/TableFunctionNode.java | 2 +-
.../java/org/apache/doris/qe/SessionVariable.java | 13 +
.../doris/statistics/AnalyticEvalStatsDerive.java | 2 +-
.../apache/doris/statistics/BaseStatsDerive.java | 4 +-
.../org/apache/doris/statistics/ColumnStat.java | 426 +++++++++++++++++++++
.../org/apache/doris/statistics/ColumnStats.java | 320 ----------------
.../doris/statistics/CrossJoinStatsDerive.java | 6 +-
.../doris/statistics/ExchangeStatsDerive.java | 2 +-
.../doris/statistics/HashJoinStatsDerive.java | 10 +-
.../doris/statistics/OlapScanStatsDerive.java | 2 +-
.../apache/doris/statistics/PartitionStats.java | 26 +-
.../apache/doris/statistics/SelectStatsDerive.java | 2 +-
.../org/apache/doris/statistics/Statistics.java | 8 +-
.../apache/doris/statistics/StatisticsManager.java | 4 +-
.../apache/doris/statistics/StatsDeriveResult.java | 53 ++-
.../doris/statistics/TableFunctionStatsDerive.java | 2 +-
.../org/apache/doris/statistics/TableStats.java | 187 ++++++---
.../{ColumnStatsTest.java => ColumnStatTest.java} | 2 +-
.../org/apache/doris/common/CheckedMathTest.java} | 30 +-
.../nereids/jobs/cascades/DeriveStatsJobTest.java | 10 +-
.../nereids/stats/ExpressionEstimationTest.java | 170 ++++++++
.../doris/nereids/stats/FilterEstimationTest.java | 239 ++++++++++++
.../doris/nereids/stats/StatsCalculatorTest.java | 61 ++-
.../apache/doris/statistics/ColumnStatsTest.java | 75 ++--
.../doris/statistics/PartitionStatsTest.java | 24 +-
.../doris/statistics/StatisticsManagerTest.java | 4 +-
.../apache/doris/statistics/StatisticsTest.java | 30 +-
.../apache/doris/statistics/TableStatsTest.java | 30 +-
65 files changed, 1871 insertions(+), 687 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index be638a9dad..3ff91ce61f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -31,7 +31,7 @@ import org.apache.doris.common.util.PrintableMap;
import org.apache.doris.common.util.Util;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.StatsType;
import com.google.common.collect.ImmutableSet;
@@ -55,12 +55,12 @@ import java.util.Set;
public class AlterColumnStatsStmt extends DdlStmt {
private static final ImmutableSet<StatsType> CONFIGURABLE_PROPERTIES_SET = new ImmutableSet.Builder<StatsType>()
- .add(ColumnStats.NDV)
- .add(ColumnStats.AVG_SIZE)
- .add(ColumnStats.MAX_SIZE)
- .add(ColumnStats.NUM_NULLS)
- .add(ColumnStats.MIN_VALUE)
- .add(ColumnStats.MAX_VALUE)
+ .add(ColumnStat.NDV)
+ .add(ColumnStat.AVG_SIZE)
+ .add(ColumnStat.MAX_SIZE)
+ .add(ColumnStat.NUM_NULLS)
+ .add(ColumnStat.MIN_VALUE)
+ .add(ColumnStat.MAX_VALUE)
.build();
private final TableName tableName;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index 14a311ba84..eed814989b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -22,7 +22,7 @@ import org.apache.doris.catalog.ScalarType;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.Util;
import org.apache.doris.qe.ShowResultSetMetaData;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import com.google.common.collect.ImmutableList;
@@ -34,12 +34,12 @@ public class ShowColumnStatsStmt extends ShowStmt {
private static final ImmutableList<String> TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("column_name")
- .add(ColumnStats.NDV.getValue())
- .add(ColumnStats.AVG_SIZE.getValue())
- .add(ColumnStats.MAX_SIZE.getValue())
- .add(ColumnStats.NUM_NULLS.getValue())
- .add(ColumnStats.MIN_VALUE.getValue())
- .add(ColumnStats.MAX_VALUE.getValue())
+ .add(ColumnStat.NDV.getValue())
+ .add(ColumnStat.AVG_SIZE.getValue())
+ .add(ColumnStat.MAX_SIZE.getValue())
+ .add(ColumnStat.NUM_NULLS.getValue())
+ .add(ColumnStat.MIN_VALUE.getValue())
+ .add(ColumnStat.MAX_VALUE.getValue())
.build();
private final TableName tableName;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
index 8541fc4343..2c10a02f76 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
@@ -219,7 +219,7 @@ public class SlotDescriptor {
}
}
// FIXME(dhc): mock ndv
- stats.setNumDistinctValues(parent.getCardinality());
+ stats.setNumDistinctValues((long) parent.getCardinality());
return stats;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java
index aa4d37a0ae..4c145dd113 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java
@@ -158,7 +158,7 @@ public class StringLiteral extends LiteralExpr {
@Override
public double getDoubleValue() {
- return Double.valueOf(value);
+ return Double.parseDouble(value);
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/CheckedMath.java b/fe/fe-core/src/main/java/org/apache/doris/common/CheckedMath.java
index 02857f42d7..82ee0caaec 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/CheckedMath.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/CheckedMath.java
@@ -21,6 +21,8 @@ import com.google.common.math.LongMath;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.math.BigDecimal;
+
public class CheckedMath {
private static final Logger LOG = LogManager.getLogger(CheckedMath.class);
@@ -38,6 +40,16 @@ public class CheckedMath {
}
}
+ public static double checkedMultiply(double a, double b) {
+ BigDecimal d1 = new BigDecimal(a);
+ BigDecimal d2 = new BigDecimal(b);
+ BigDecimal result = d1.multiply(d2);
+ if (result.compareTo(new BigDecimal(Double.MAX_VALUE)) > 0) {
+ return Double.MAX_VALUE;
+ }
+ return result.doubleValue();
+ }
+
/**
* Computes and returns the sum of two longs. If an overflow occurs,
* the maximum Long value is returned (Long.MAX_VALUE).
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 87b4a7e390..9b584928f2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1787,4 +1787,5 @@ public class Config extends ConfigBase {
@ConfField(mutable = false)
public static int statistic_task_scheduler_execution_interval_ms = 60 * 1000;
+
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
index 40ae041191..751a6de1d1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
@@ -355,15 +355,15 @@ public class Util {
return result;
}
- public static float getFloatPropertyOrDefault(String valStr, float defaultVal, Predicate<Float> pred,
+ public static double getDoublePropertyOrDefault(String valStr, double defaultVal, Predicate<Double> pred,
String hintMsg) throws AnalysisException {
if (Strings.isNullOrEmpty(valStr)) {
return defaultVal;
}
- float result = defaultVal;
+ double result = defaultVal;
try {
- result = Float.valueOf(valStr);
+ result = Double.parseDouble(valStr);
} catch (NumberFormatException e) {
throw new AnalysisException(hintMsg);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java
index 8e533c3dc0..25c40bf2dc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java
@@ -216,7 +216,7 @@ public class CostCalculator {
PhysicalHashJoin<? extends Plan, ? extends Plan> physicalHashJoin) {
StatsDeriveResult outputStats = physicalHashJoin.getGroupExpression().get().getOwnerGroup().getStatistics();
- float size = outputStats.computeSize();
+ double size = outputStats.computeSize();
return CostEstimate.ofCpu(size);
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
index 188acaeba5..73b3c6c405 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java
@@ -190,7 +190,6 @@ public class CostAndEnforcerJob extends Job implements Cloneable {
return;
}
StatsCalculator.estimate(groupExpression);
-
curTotalCost -= curNodeCost;
curNodeCost = CostCalculator.calculateCost(groupExpression);
curTotalCost += curNodeCost;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
new file mode 100644
index 0000000000..188b0a4a07
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.nereids.trees.expressions.Add;
+import org.apache.doris.nereids.trees.expressions.BinaryArithmetic;
+import org.apache.doris.nereids.trees.expressions.Divide;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Multiply;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.Subtract;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Avg;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekOfYear;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.util.Utils;
+import org.apache.doris.statistics.ColumnStat;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Used to estimate for expressions that not producing boolean value.
+ */
+public class ExpressionEstimation extends ExpressionVisitor<ColumnStat, StatsDeriveResult> {
+
+ private static ExpressionEstimation INSTANCE = new ExpressionEstimation();
+
+ public static ColumnStat estimate(Expression expression, StatsDeriveResult stats) {
+ return INSTANCE.visit(expression, stats);
+ }
+
+ @Override
+ public ColumnStat visit(Expression expr, StatsDeriveResult context) {
+ return expr.accept(this, context);
+ }
+
+ @Override
+ public ColumnStat visitLiteral(Literal literal, StatsDeriveResult context) {
+ if (literal.isStringLiteral()) {
+ return ColumnStat.UNKNOWN;
+ }
+ double literalVal = Double.parseDouble(literal.getValue().toString());
+ ColumnStat columnStat = new ColumnStat();
+ columnStat.setMaxValue(literalVal);
+ columnStat.setMinValue(literalVal);
+ columnStat.setNdv(1);
+ columnStat.setNumNulls(1);
+ columnStat.setAvgSizeByte(1);
+ return columnStat;
+ }
+
+ @Override
+ public ColumnStat visitSlotReference(SlotReference slotReference, StatsDeriveResult context) {
+ ColumnStat columnStat = context.getColumnStatsBySlot(slotReference);
+ Preconditions.checkState(columnStat != null);
+ return columnStat;
+ }
+
+ @Override
+ public ColumnStat visitBinaryArithmetic(BinaryArithmetic binaryArithmetic, StatsDeriveResult context) {
+ ColumnStat leftColStats = binaryArithmetic.left().accept(this, context);
+ ColumnStat rightColStats = binaryArithmetic.right().accept(this, context);
+ double leftNdv = leftColStats.getNdv();
+ double rightNdv = rightColStats.getNdv();
+ double ndv = Math.max(leftNdv, rightNdv);
+ double leftNullCount = leftColStats.getNumNulls();
+ double rightNullCount = rightColStats.getNumNulls();
+ double rowCount = context.getRowCount();
+ double numNulls = context.getRowCount()
+ * (1 - (1 - (leftNullCount / rowCount) * (1 - rightNullCount / rowCount)));
+ double leftMax = leftColStats.getMaxValue();
+ double rightMax = rightColStats.getMaxValue();
+ double leftMin = leftColStats.getMinValue();
+ double rightMin = rightColStats.getMinValue();
+
+ if (binaryArithmetic instanceof Add) {
+ return new ColumnStat(ndv, leftColStats.getAvgSizeByte(), leftColStats.getMaxSizeByte(),
+ numNulls, leftMin + rightMin, leftMax + rightMax);
+ }
+ if (binaryArithmetic instanceof Subtract) {
+ return new ColumnStat(ndv, leftColStats.getAvgSizeByte(), leftColStats.getMaxSizeByte(),
+ numNulls, leftMin - rightMax, leftMax - rightMin);
+ }
+ // TODO: stat for multiply and divide produced by below algorithm may have huge deviation with reality.
+ if (binaryArithmetic instanceof Multiply) {
+ double min = Math.min(
+ Math.min(
+ Math.min(leftMin * rightMin, leftMin * rightMax),
+ leftMax * rightMin),
+ leftMax * rightMax);
+ double max = Math.max(
+ Math.max(
+ Math.max(leftMin * rightMin, leftMin * rightMax),
+ leftMax * rightMin),
+ leftMax * rightMax);
+ return new ColumnStat(ndv, leftColStats.getAvgSizeByte(), leftColStats.getMaxSizeByte(),
+ numNulls, min, max);
+ }
+ if (binaryArithmetic instanceof Divide) {
+ double min = Math.min(
+ Math.min(
+ Math.min(leftMin / noneZeroDivisor(rightMin), leftMin / noneZeroDivisor(rightMax)),
+ leftMax / noneZeroDivisor(rightMin)),
+ leftMax / noneZeroDivisor(rightMax));
+ double max = Math.max(
+ Math.max(
+ Math.max(leftMin / noneZeroDivisor(rightMin), leftMin / noneZeroDivisor(rightMax)),
+ leftMax / noneZeroDivisor(rightMin)),
+ leftMax / noneZeroDivisor(rightMax));
+ return new ColumnStat(ndv, leftColStats.getAvgSizeByte(), leftColStats.getMaxSizeByte(),
+ numNulls, min, max);
+ }
+ return ColumnStat.UNKNOWN;
+ }
+
+ private double noneZeroDivisor(double d) {
+ return d == 0.0 ? 1.0 : d;
+ }
+
+ @Override
+ public ColumnStat visitMin(Min min, StatsDeriveResult context) {
+ Expression child = min.child();
+ ColumnStat columnStat = child.accept(this, context);
+ if (columnStat == ColumnStat.UNKNOWN) {
+ return ColumnStat.UNKNOWN;
+ }
+ return new ColumnStat(1, min.child().getDataType().width(),
+ min.child().getDataType().width(), 1, columnStat.getMinValue(), columnStat.getMinValue());
+ }
+
+ @Override
+ public ColumnStat visitMax(Max max, StatsDeriveResult context) {
+ Expression child = max.child();
+ ColumnStat columnStat = child.accept(this, context);
+ if (columnStat == ColumnStat.UNKNOWN) {
+ return ColumnStat.UNKNOWN;
+ }
+ return new ColumnStat(1, max.child().getDataType().width(),
+ max.child().getDataType().width(), 0, columnStat.getMaxValue(), columnStat.getMaxValue());
+ }
+
+ @Override
+ public ColumnStat visitCount(Count count, StatsDeriveResult context) {
+ Expression child = count.child(0);
+ ColumnStat columnStat = child.accept(this, context);
+ if (columnStat == ColumnStat.UNKNOWN) {
+ return ColumnStat.UNKNOWN;
+ }
+ double expectedValue = context.getRowCount() - columnStat.getNumNulls();
+ return new ColumnStat(1,
+ count.getDataType().width(), count.getDataType().width(), 0, expectedValue, expectedValue);
+ }
+
+ // TODO: return a proper estimated stat after supports histogram
+ @Override
+ public ColumnStat visitSum(Sum sum, StatsDeriveResult context) {
+ return sum.child().accept(this, context);
+ }
+
+ // TODO: return a proper estimated stat after supports histogram
+ @Override
+ public ColumnStat visitAvg(Avg avg, StatsDeriveResult context) {
+ return avg.child().accept(this, context);
+ }
+
+ @Override
+ public ColumnStat visitYear(Year year, StatsDeriveResult context) {
+ ColumnStat childStat = year.child().accept(this, context);
+ double maxVal = childStat.getMaxValue();
+ double minVal = childStat.getMinValue();
+ long minYear = Utils.getLocalDatetimeFromLong((long) minVal).getYear();
+ long maxYear = Utils.getLocalDatetimeFromLong((long) maxVal).getYear();
+ return new ColumnStat(childStat.getNdv(), 4, 4,
+ maxYear - minYear + 1, minYear, maxYear);
+ }
+
+ @Override
+ public ColumnStat visitWeekOfYear(WeekOfYear weekOfYear, StatsDeriveResult context) {
+ ColumnStat childStat = weekOfYear.child().accept(this, context);
+ return new ColumnStat(52, 2, 2, childStat.getNumNulls(), 1, 52);
+ }
+
+ // TODO: find a proper way to predicate stat of substring
+ @Override
+ public ColumnStat visitSubstring(Substring substring, StatsDeriveResult context) {
+ return substring.child(0).accept(this, context);
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
new file mode 100644
index 0000000000..6ba6fbe211
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -0,0 +1,300 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.nereids.stats.FilterEstimation.EstimationContext;
+import org.apache.doris.nereids.trees.expressions.And;
+import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
+import org.apache.doris.nereids.trees.expressions.CompoundPredicate;
+import org.apache.doris.nereids.trees.expressions.EqualTo;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.GreaterThan;
+import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
+import org.apache.doris.nereids.trees.expressions.InPredicate;
+import org.apache.doris.nereids.trees.expressions.LessThan;
+import org.apache.doris.nereids.trees.expressions.LessThanEqual;
+import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.Or;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.statistics.ColumnStat;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Calculate selectivity of expression that produces boolean value.
+ * TODO: Should consider the distribution of data.
+ */
+public class FilterEstimation extends ExpressionVisitor<StatsDeriveResult, EstimationContext> {
+
+ private static final double DEFAULT_SELECTIVITY = 0.1;
+
+ private static final double DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY = 1.0 / 3.0;
+
+ private static final double DEFAULT_EQUALITY_COMPARISON_SELECTIVITY = 0.1;
+
+ private final StatsDeriveResult stats;
+
+ public FilterEstimation(StatsDeriveResult stats) {
+ Preconditions.checkNotNull(stats);
+ this.stats = stats;
+ }
+
+ /**
+ * This method will update the stats according to the selectivity.
+ */
+ public StatsDeriveResult estimate(Expression expression) {
+ // For a comparison predicate, only when it's left side is a slot and right side is a literal, we would
+ // consider is a valid predicate.
+ StatsDeriveResult stats = calculate(expression);
+ double expectedRowCount = stats.getRowCount();
+ for (ColumnStat columnStat : stats.getSlotToColumnStats().values()) {
+ if (columnStat.getNdv() > expectedRowCount) {
+ columnStat.setNdv(expectedRowCount);
+ }
+ }
+ return stats;
+ }
+
+ private StatsDeriveResult calculate(Expression expression) {
+ return expression.accept(this, null);
+ }
+
+ @Override
+ public StatsDeriveResult visit(Expression expr, EstimationContext context) {
+ return new StatsDeriveResult(stats).updateRowCountBySelectivity(DEFAULT_SELECTIVITY);
+ }
+
+ @Override
+ public StatsDeriveResult visitCompoundPredicate(CompoundPredicate predicate, EstimationContext context) {
+ Expression leftExpr = predicate.child(0);
+ Expression rightExpr = predicate.child(1);
+ StatsDeriveResult leftStats = leftExpr.accept(this, null);
+ if (predicate instanceof And) {
+ return rightExpr.accept(new FilterEstimation(leftStats), null);
+ } else if (predicate instanceof Or) {
+ StatsDeriveResult rightStats = rightExpr.accept(this, null);
+ StatsDeriveResult andStats = rightExpr.accept(new FilterEstimation(leftStats), null);
+ double rowCount = leftStats.getRowCount() + rightStats.getRowCount() - andStats.getRowCount();
+ StatsDeriveResult orStats = new StatsDeriveResult(stats).setRowCount(rowCount);
+ for (Map.Entry<Slot, ColumnStat> entry : leftStats.getSlotToColumnStats().entrySet()) {
+ Slot keySlot = entry.getKey();
+ ColumnStat leftColStats = entry.getValue();
+ ColumnStat rightColStats = rightStats.getColumnStatsBySlot(keySlot);
+ ColumnStat estimatedColStats = new ColumnStat(leftColStats);
+ estimatedColStats.setMinValue(Math.min(leftColStats.getMinValue(), rightColStats.getMinValue()));
+ estimatedColStats
+ .setMaxSizeByte(Math.max(leftColStats.getMaxSizeByte(), rightColStats.getMaxSizeByte()));
+ orStats.addColumnStats(keySlot, estimatedColStats);
+ }
+ return orStats;
+ }
+ throw new RuntimeException(String.format("Unexpected predicate type: %s", predicate.toSql()));
+ }
+
+ @Override
+ public StatsDeriveResult visitComparisonPredicate(ComparisonPredicate cp, EstimationContext context) {
+ Expression left = cp.left();
+ Expression right = cp.right();
+ ColumnStat statsForLeft = ExpressionEstimation.estimate(left, stats);
+ ColumnStat statsForRight = ExpressionEstimation.estimate(right, stats);
+
+ double selectivity;
+ if (!(left instanceof Literal) && !(right instanceof Literal)) {
+ selectivity = calculateWhenBothChildIsColumn(cp, statsForLeft, statsForRight);
+ } else {
+ // For literal, it's max min is same value.
+ selectivity = calculateWhenRightChildIsLiteral(cp, statsForLeft, statsForRight.getMaxValue());
+ }
+ return new StatsDeriveResult(stats).updateRowCountOnCopy(selectivity);
+ }
+
+ private double calculateWhenRightChildIsLiteral(ComparisonPredicate cp,
+ ColumnStat statsForLeft, double val) {
+ double ndv = statsForLeft.getNdv();
+ double max = statsForLeft.getMaxValue();
+ double min = statsForLeft.getMinValue();
+ if (cp instanceof EqualTo) {
+ if (val > max || val < min) {
+ return 0.0;
+ }
+ return 1.0 / ndv;
+ }
+ if (cp instanceof LessThan) {
+ if (val <= min) {
+ return 0.0;
+ }
+ if (val > max) {
+ return 1.0;
+ }
+ if (val == max) {
+ return 1.0 - 1.0 / ndv;
+ }
+ return (val - min) / (max - min);
+ }
+ if (cp instanceof LessThanEqual) {
+ if (val < min) {
+ return 0.0;
+ }
+ if (val == min) {
+ return 1.0 / ndv;
+ }
+ if (val >= max) {
+ return 1.0;
+ }
+ return (val - min) / (max - min);
+ }
+ if (cp instanceof GreaterThan) {
+ if (val >= max) {
+ return 0.0;
+ }
+ if (val == min) {
+ return 1.0 - 1.0 / ndv;
+ }
+ if (val < min) {
+ return 1.0;
+ }
+ return (max - val) / (max - min);
+ }
+ if (cp instanceof GreaterThanEqual) {
+ if (val > max) {
+ return 0.0;
+ }
+ if (val == max) {
+ return 1.0 / ndv;
+ }
+ if (val <= min) {
+ return 1.0;
+ }
+ return (max - val) / (max - min);
+ }
+ throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
+ }
+
+ private double calculateWhenBothChildIsColumn(ComparisonPredicate cp,
+ ColumnStat statsForLeft, ColumnStat statsForRight) {
+ double leftMin = statsForLeft.getMinValue();
+ double rightMin = statsForRight.getMinValue();
+ double leftMax = statsForLeft.getMaxValue();
+ double rightMax = statsForRight.getMaxValue();
+ if (cp instanceof EqualTo) {
+ if (!statsForLeft.hasIntersect(statsForRight)) {
+ return 0.0;
+ }
+ return DEFAULT_EQUALITY_COMPARISON_SELECTIVITY;
+ }
+ if (cp instanceof GreaterThan) {
+ if (leftMax <= rightMin) {
+ return 0.0;
+ } else if (leftMin >= rightMax) {
+ return 1.0;
+ } else {
+ return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
+ }
+ }
+ if (cp instanceof GreaterThanEqual) {
+ if (leftMax < rightMin) {
+ return 0.0;
+ } else if (leftMin > rightMax) {
+ return 1.0;
+ } else {
+ return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
+ }
+ }
+ if (cp instanceof LessThan) {
+ if (leftMin >= rightMax) {
+ return 0.0;
+ } else if (leftMax <= rightMin) {
+ return 1.0;
+ } else {
+ return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
+ }
+ }
+ if (cp instanceof LessThanEqual) {
+ if (leftMin > rightMax) {
+ return 0.0;
+ } else if (leftMax < rightMin) {
+ return 1.0;
+ } else {
+ return DEFAULT_INEQUALITY_COMPARISON_SELECTIVITY;
+ }
+ }
+ throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
+ }
+
+ @Override
+ public StatsDeriveResult visitInPredicate(InPredicate inPredicate, EstimationContext context) {
+ boolean isNotIn = context != null && context.isNot;
+ Expression compareExpr = inPredicate.getCompareExpr();
+ ColumnStat compareExprStats = ExpressionEstimation.estimate(compareExpr, stats);
+ if (ColumnStat.isInvalid(compareExprStats)) {
+ return stats;
+ }
+ List<Expression> options = inPredicate.getOptions();
+ double maxOption = 0;
+ double minOption = Double.MAX_VALUE;
+ double optionDistinctCount = 0;
+ for (Expression option : options) {
+ ColumnStat optionStats = ExpressionEstimation.estimate(option, stats);
+ if (ColumnStat.isInvalid(optionStats)) {
+ return stats;
+ }
+ optionDistinctCount += optionStats.getNdv();
+ maxOption = Math.max(optionStats.getMaxValue(), maxOption);
+ minOption = Math.min(optionStats.getMinValue(), minOption);
+ }
+ double selectivity = DEFAULT_SELECTIVITY;
+ double cmpExprMax = compareExprStats.getMaxValue();
+ double cmpExprMin = compareExprStats.getMinValue();
+ boolean hasOverlap = Math.max(cmpExprMin, minOption) <= Math.min(cmpExprMax, maxOption);
+ if (!hasOverlap) {
+ selectivity = 0.0;
+ }
+ double cmpDistinctCount = compareExprStats.getNdv();
+ selectivity = Math.min(1.0, optionDistinctCount / cmpDistinctCount);
+ double expectedMax = Math.min(cmpExprMax, maxOption);
+ double expectedMin = Math.max(cmpExprMin, minOption);
+ compareExprStats.setMaxValue(expectedMax);
+ compareExprStats.setMinValue(expectedMin);
+ StatsDeriveResult estimated = new StatsDeriveResult(stats);
+ if (compareExpr instanceof SlotReference && !isNotIn) {
+ estimated.addColumnStats((SlotReference) compareExpr, compareExprStats);
+ }
+ return estimated.updateRowCountOnCopy(isNotIn ? 1.0 - selectivity : selectivity);
+ }
+
+ @Override
+ public StatsDeriveResult visitNot(Not not, EstimationContext none) {
+ Preconditions.checkState(!(not.child() instanceof Not),
+ "Consecutive Not statement should be merged previously");
+ EstimationContext context = new EstimationContext();
+ context.isNot = true;
+ return not.child().accept(this, context);
+ }
+
+ static class EstimationContext {
+ private boolean isNot;
+ }
+
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java
index 7eb6cab2f5..363c2e7316 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java
@@ -26,7 +26,7 @@ import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import com.google.common.base.Preconditions;
@@ -39,9 +39,9 @@ public class FilterSelectivityCalculator extends ExpressionVisitor<Double, Void>
private static final double DEFAULT_EQUAL_SELECTIVITY = 0.3;
private static final double DEFAULT_RANGE_SELECTIVITY = 0.8;
- private final Map<Slot, ColumnStats> slotRefToStats;
+ private final Map<Slot, ColumnStat> slotRefToStats;
- public FilterSelectivityCalculator(Map<Slot, ColumnStats> slotRefToStats) {
+ public FilterSelectivityCalculator(Map<Slot, ColumnStat> slotRefToStats) {
Preconditions.checkNotNull(slotRefToStats);
this.slotRefToStats = slotRefToStats;
}
@@ -92,11 +92,11 @@ public class FilterSelectivityCalculator extends ExpressionVisitor<Double, Void>
@Override
public Double visitEqualTo(EqualTo equalTo, Void context) {
SlotReference left = (SlotReference) equalTo.left();
- ColumnStats columnStats = slotRefToStats.get(left);
+ ColumnStat columnStats = slotRefToStats.get(left);
if (columnStats == null) {
return DEFAULT_EQUAL_SELECTIVITY;
}
- long ndv = columnStats.getNdv();
+ double ndv = columnStats.getNdv();
return ndv < 0 ? DEFAULT_EQUAL_SELECTIVITY : ndv == 0 ? 0 : 1.0 / columnStats.getNdv();
}
// TODO: Should consider the distribution of data.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index 7a5f4476db..8a17ab29b2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -30,7 +30,7 @@ import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.algebra.Join;
import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin;
import org.apache.doris.qe.ConnectContext;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.StatsDeriveResult;
import com.google.common.base.Preconditions;
@@ -66,7 +66,7 @@ public class JoinEstimation {
private static class JoinEstimationResult {
public boolean forbiddenReducePropagation = false;
public boolean isReducedByHashJoin = false;
- public long rowCount = 0;
+ public double rowCount = 0;
}
/**
@@ -137,7 +137,7 @@ public class JoinEstimation {
// .collect(Collectors.toList());
boolean isReducedByHashJoin = false;
boolean forbiddenReducePropagation = false;
- long rowCount;
+ double rowCount;
if (joinType == JoinType.LEFT_SEMI_JOIN || joinType == JoinType.LEFT_ANTI_JOIN) {
if (rightStats.isReduced && rightStats.width <= LIMIT_RIGHT_WIDTH) {
rowCount = leftStats.getRowCount() / REDUCE_TIMES;
@@ -179,7 +179,7 @@ public class JoinEstimation {
forbiddenReducePropagation = better.forbiddenReducePropagation;
isReducedByHashJoin = better.isReducedByHashJoin;
} else {
- long childRowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount());
+ double childRowCount = Math.max(leftStats.getRowCount(), rightStats.getRowCount());
rowCount = childRowCount;
}
} else if (joinType == JoinType.LEFT_OUTER_JOIN) {
@@ -228,7 +228,7 @@ public class JoinEstimation {
// we should handle that properly.
private static long getSemiJoinRowCount(StatsDeriveResult leftStats, StatsDeriveResult rightStats,
List<Expression> hashConjuncts, JoinType joinType) {
- long rowCount;
+ double rowCount;
if (JoinType.RIGHT_SEMI_JOIN.equals(joinType) || JoinType.RIGHT_ANTI_JOIN.equals(joinType)) {
if (rightStats.getRowCount() == -1) {
return -1;
@@ -240,14 +240,14 @@ public class JoinEstimation {
}
rowCount = leftStats.getRowCount();
}
- Map<Slot, ColumnStats> leftSlotToColStats = leftStats.getSlotToColumnStats();
- Map<Slot, ColumnStats> rightSlotToColStats = rightStats.getSlotToColumnStats();
+ Map<Slot, ColumnStat> leftSlotToColStats = leftStats.getSlotToColumnStats();
+ Map<Slot, ColumnStat> rightSlotToColStats = rightStats.getSlotToColumnStats();
double minSelectivity = 1.0;
for (Expression hashConjunct : hashConjuncts) {
// TODO: since we have no column stats here. just use a fix ratio to compute the row count.
- long lhsNdv = leftSlotToColStats.get(removeCast(hashConjunct.child(0))).getNdv();
+ double lhsNdv = leftSlotToColStats.get(removeCast(hashConjunct.child(0))).getNdv();
lhsNdv = Math.min(lhsNdv, leftStats.getRowCount());
- long rhsNdv = rightSlotToColStats.get(removeCast(hashConjunct.child(1))).getNdv();
+ double rhsNdv = rightSlotToColStats.get(removeCast(hashConjunct.child(1))).getNdv();
rhsNdv = Math.min(rhsNdv, rightStats.getRowCount());
// Skip conjuncts with unknown NDV on either side.
if (lhsNdv == -1 || rhsNdv == -1) {
@@ -280,17 +280,17 @@ public class JoinEstimation {
return Math.round(rowCount * minSelectivity);
}
- private static long getJoinRowCount(StatsDeriveResult leftStats, StatsDeriveResult rightStats,
+ private static double getJoinRowCount(StatsDeriveResult leftStats, StatsDeriveResult rightStats,
List<Expression> eqConjunctList, JoinType joinType) {
- long lhsCard = leftStats.getRowCount();
- long rhsCard = rightStats.getRowCount();
- Map<Slot, ColumnStats> leftSlotToColumnStats = leftStats.getSlotToColumnStats();
- Map<Slot, ColumnStats> rightSlotToColumnStats = rightStats.getSlotToColumnStats();
+ double lhsCard = leftStats.getRowCount();
+ double rhsCard = rightStats.getRowCount();
+ Map<Slot, ColumnStat> leftSlotToColumnStats = leftStats.getSlotToColumnStats();
+ Map<Slot, ColumnStat> rightSlotToColumnStats = rightStats.getSlotToColumnStats();
if (lhsCard == -1 || rhsCard == -1) {
return lhsCard;
}
- long result = -1;
+ double result = -1;
for (Expression eqJoinConjunct : eqConjunctList) {
Expression left = eqJoinConjunct.child(0);
if (!(left instanceof SlotReference)) {
@@ -301,19 +301,19 @@ public class JoinEstimation {
continue;
}
SlotReference leftSlot = (SlotReference) left;
- ColumnStats leftColStats = leftSlotToColumnStats.get(leftSlot);
+ ColumnStat leftColStats = leftSlotToColumnStats.get(leftSlot);
if (leftColStats == null) {
continue;
}
SlotReference rightSlot = (SlotReference) right;
- ColumnStats rightColStats = rightSlotToColumnStats.get(rightSlot);
+ ColumnStat rightColStats = rightSlotToColumnStats.get(rightSlot);
if (rightColStats == null) {
continue;
}
double leftSideNdv = leftColStats.getNdv();
double rightSideNdv = rightColStats.getNdv();
long tmpNdv = (long) Math.max(1, Math.max(leftSideNdv, rightSideNdv));
- long joinCard = tmpNdv == rhsCard ? lhsCard : CheckedMath.checkedMultiply(
+ double joinCard = tmpNdv == rhsCard ? lhsCard : CheckedMath.checkedMultiply(
Math.round((lhsCard / Math.max(1, Math.max(leftSideNdv, rightSideNdv)))), rhsCard);
if (result == -1) {
result = joinCard;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index e82edc957a..faee8258f4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -17,7 +17,6 @@
package org.apache.doris.nereids.stats;
-import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
@@ -64,7 +63,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.qe.ConnectContext;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.TableStats;
@@ -95,6 +94,10 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
* estimate stats
*/
public static void estimate(GroupExpression groupExpression) {
+ if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableNereidsStatsDeriveV2) {
+ StatsCalculatorV2.estimate(groupExpression);
+ return;
+ }
StatsCalculator statsCalculator = new StatsCalculator(groupExpression);
statsCalculator.estimate();
}
@@ -264,7 +267,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
// TODO: tmp mock the table stats, after we support the table stats, we should remove this mock.
mockRowCountInStatistic(scan)
);
- Map<Slot, ColumnStats> slotToColumnStats = new HashMap<>();
+ Map<Slot, ColumnStat> slotToColumnStats = new HashMap<>();
Set<SlotReference> slotSet = scan.getOutput().stream().filter(SlotReference.class::isInstance)
.map(s -> (SlotReference) s).collect(Collectors.toSet());
for (SlotReference slotReference : slotSet) {
@@ -272,10 +275,10 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
if (colName == null) {
throw new RuntimeException("Column name of SlotReference shouldn't be null here");
}
- ColumnStats columnStats = tableStats.getColumnStatsOrDefault(colName);
+ ColumnStat columnStats = tableStats.getColumnStatsOrDefault(colName);
slotToColumnStats.put(slotReference, columnStats);
}
- long rowCount = tableStats.getRowCount();
+ long rowCount = (long) tableStats.getRowCount();
StatsDeriveResult stats = new StatsDeriveResult(rowCount,
new HashMap<>(), new HashMap<>());
stats.setSlotToColumnStats(slotToColumnStats);
@@ -320,17 +323,17 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
// .map(childSlotToColumnStats::get)
// .map(ColumnStats::getNdv)
// .reduce(1L, (a, b) -> a * b);
- long resultSetCount = childStats.getRowCount() / DEFAULT_AGGREGATE_RATIO;
+ long resultSetCount = (long) childStats.getRowCount() / DEFAULT_AGGREGATE_RATIO;
if (resultSetCount <= 0) {
resultSetCount = 1L;
}
- Map<Slot, ColumnStats> slotToColumnStats = Maps.newHashMap();
+ Map<Slot, ColumnStat> slotToColumnStats = Maps.newHashMap();
List<NamedExpression> outputExpressions = aggregate.getOutputExpressions();
// TODO: 1. Estimate the output unit size by the type of corresponding AggregateFunction
// 2. Handle alias, literal in the output expression list
for (NamedExpression outputExpression : outputExpressions) {
- slotToColumnStats.put(outputExpression.toSlot(), new ColumnStats());
+ slotToColumnStats.put(outputExpression.toSlot(), new ColumnStat());
}
StatsDeriveResult statsDeriveResult = new StatsDeriveResult(resultSetCount, slotToColumnStats);
statsDeriveResult.isReduced = true;
@@ -342,13 +345,12 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private StatsDeriveResult computeProject(Project project) {
List<NamedExpression> projections = project.getProjects();
StatsDeriveResult statsDeriveResult = groupExpression.getCopyOfChildStats(0);
- Map<Slot, ColumnStats> childColumnStats = statsDeriveResult.getSlotToColumnStats();
- Map<Slot, ColumnStats> columnsStats = projections.stream().map(projection -> {
- ColumnStats value = null;
+ Map<Slot, ColumnStat> childColumnStats = statsDeriveResult.getSlotToColumnStats();
+ Map<Slot, ColumnStat> columnsStats = projections.stream().map(projection -> {
+ ColumnStat value = null;
Set<Slot> slots = projection.getInputSlots();
if (slots.isEmpty()) {
- value = new ColumnStats(1, 1, 1, 0,
- new NullLiteral(), new NullLiteral());
+ value = ColumnStat.createDefaultColumnStats();
} else {
// TODO: just a trick here, need to do real project on column stats
for (Slot slot : slots) {
@@ -358,8 +360,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
}
if (value == null) {
- value = new ColumnStats(1, 1, 1, 0,
- new NullLiteral(), new NullLiteral());
+ value = ColumnStat.createDefaultColumnStats();
}
}
return new SimpleEntry<>(projection.toSlot(), value);
@@ -369,11 +370,11 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
private StatsDeriveResult computeOneRowRelation(OneRowRelation oneRowRelation) {
- Map<Slot, ColumnStats> columnStatsMap = oneRowRelation.getProjects()
+ Map<Slot, ColumnStat> columnStatsMap = oneRowRelation.getProjects()
.stream()
.map(project -> {
- ColumnStats columnStats = new ColumnStats(1, -1, -1, -1,
- new NullLiteral(), new NullLiteral());
+ ColumnStat columnStats = new ColumnStat();
+ columnStats.setNdv(1);
// TODO: compute the literal size
return Pair.of(project.toSlot(), columnStats);
})
@@ -383,11 +384,14 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
private StatsDeriveResult computeEmptyRelation(EmptyRelation emptyRelation) {
- Map<Slot, ColumnStats> columnStatsMap = emptyRelation.getProjects()
+ Map<Slot, ColumnStat> columnStatsMap = emptyRelation.getProjects()
.stream()
.map(project -> {
- ColumnStats columnStats = new ColumnStats(0, 0, 0, 0,
- new NullLiteral(), new NullLiteral());
+ ColumnStat columnStats = new ColumnStat();
+ columnStats.setNdv(0);
+ columnStats.setMaxSizeByte(0);
+ columnStats.setNumNulls(0);
+ columnStats.setAvgSizeByte(0);
return Pair.of(project.toSlot(), columnStats);
})
.collect(Collectors.toMap(Pair::key, Pair::value));
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculatorV2.java
similarity index 84%
copy from fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
copy to fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculatorV2.java
index e82edc957a..05f34e24a4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculatorV2.java
@@ -17,13 +17,13 @@
package org.apache.doris.nereids.stats;
-import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.memo.GroupExpression;
+import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
@@ -64,7 +64,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.qe.ConnectContext;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatsDeriveResult;
import org.apache.doris.statistics.TableStats;
@@ -81,13 +81,11 @@ import java.util.stream.Collectors;
/**
* Used to calculate the stats for each plan
*/
-public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> {
-
- private static final int DEFAULT_AGGREGATE_RATIO = 1000;
+public class StatsCalculatorV2 extends DefaultPlanVisitor<StatsDeriveResult, Void> {
private final GroupExpression groupExpression;
- private StatsCalculator(GroupExpression groupExpression) {
+ private StatsCalculatorV2(GroupExpression groupExpression) {
this.groupExpression = groupExpression;
}
@@ -95,8 +93,8 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
* estimate stats
*/
public static void estimate(GroupExpression groupExpression) {
- StatsCalculator statsCalculator = new StatsCalculator(groupExpression);
- statsCalculator.estimate();
+ StatsCalculatorV2 statsCalculatorV2 = new StatsCalculatorV2(groupExpression);
+ statsCalculatorV2.estimate();
}
private void estimate() {
@@ -248,12 +246,9 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private StatsDeriveResult computeFilter(Filter filter) {
StatsDeriveResult stats = groupExpression.getCopyOfChildStats(0);
- FilterSelectivityCalculator selectivityCalculator =
- new FilterSelectivityCalculator(stats.getSlotToColumnStats());
- double selectivity = selectivityCalculator.estimate(filter.getPredicates());
- stats.updateRowCountBySelectivity(selectivity);
- stats.isReduced = selectivity < 1.0;
- return stats;
+ FilterEstimation selectivityCalculator =
+ new FilterEstimation(stats);
+ return selectivityCalculator.estimate(filter.getPredicates());
}
// TODO: 1. Subtract the pruned partition
@@ -264,7 +259,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
// TODO: tmp mock the table stats, after we support the table stats, we should remove this mock.
mockRowCountInStatistic(scan)
);
- Map<Slot, ColumnStats> slotToColumnStats = new HashMap<>();
+ Map<Slot, ColumnStat> slotToColumnStats = new HashMap<>();
Set<SlotReference> slotSet = scan.getOutput().stream().filter(SlotReference.class::isInstance)
.map(s -> (SlotReference) s).collect(Collectors.toSet());
for (SlotReference slotReference : slotSet) {
@@ -272,10 +267,10 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
if (colName == null) {
throw new RuntimeException("Column name of SlotReference shouldn't be null here");
}
- ColumnStats columnStats = tableStats.getColumnStatsOrDefault(colName);
- slotToColumnStats.put(slotReference, columnStats);
+ ColumnStat columnStat = tableStats.getColumnStatCopy(colName);
+ slotToColumnStats.put(slotReference, columnStat);
}
- long rowCount = tableStats.getRowCount();
+ double rowCount = tableStats.getRowCount();
StatsDeriveResult stats = new StatsDeriveResult(rowCount,
new HashMap<>(), new HashMap<>());
stats.setSlotToColumnStats(slotToColumnStats);
@@ -311,29 +306,24 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private StatsDeriveResult computeAggregate(Aggregate aggregate) {
// TODO: since we have no column stats here. just use a fix ratio to compute the row count.
- // List<Expression> groupByExpressions = aggregate.getGroupByExpressions();
+ List<Expression> groupByExpressions = aggregate.getGroupByExpressions();
StatsDeriveResult childStats = groupExpression.getCopyOfChildStats(0);
- // Map<Slot, ColumnStats> childSlotToColumnStats = childStats.getSlotToColumnStats();
- // long resultSetCount = groupByExpressions.stream()
- // .flatMap(expr -> expr.getInputSlots().stream())
- // .filter(childSlotToColumnStats::containsKey)
- // .map(childSlotToColumnStats::get)
- // .map(ColumnStats::getNdv)
- // .reduce(1L, (a, b) -> a * b);
- long resultSetCount = childStats.getRowCount() / DEFAULT_AGGREGATE_RATIO;
+ Map<Slot, ColumnStat> childSlotToColumnStats = childStats.getSlotToColumnStats();
+ double resultSetCount = groupByExpressions.stream().flatMap(expr -> expr.getInputSlots().stream())
+ .filter(childSlotToColumnStats::containsKey).map(childSlotToColumnStats::get).map(ColumnStat::getNdv)
+ .reduce(1d, (a, b) -> a * b);
if (resultSetCount <= 0) {
resultSetCount = 1L;
}
- Map<Slot, ColumnStats> slotToColumnStats = Maps.newHashMap();
+ Map<Slot, ColumnStat> slotToColumnStats = Maps.newHashMap();
List<NamedExpression> outputExpressions = aggregate.getOutputExpressions();
// TODO: 1. Estimate the output unit size by the type of corresponding AggregateFunction
// 2. Handle alias, literal in the output expression list
for (NamedExpression outputExpression : outputExpressions) {
- slotToColumnStats.put(outputExpression.toSlot(), new ColumnStats());
+ slotToColumnStats.put(outputExpression.toSlot(), new ColumnStat());
}
StatsDeriveResult statsDeriveResult = new StatsDeriveResult(resultSetCount, slotToColumnStats);
- statsDeriveResult.isReduced = true;
// TODO: Update ColumnStats properly, add new mapping from output slot to ColumnStats
return statsDeriveResult;
}
@@ -342,13 +332,12 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
private StatsDeriveResult computeProject(Project project) {
List<NamedExpression> projections = project.getProjects();
StatsDeriveResult statsDeriveResult = groupExpression.getCopyOfChildStats(0);
- Map<Slot, ColumnStats> childColumnStats = statsDeriveResult.getSlotToColumnStats();
- Map<Slot, ColumnStats> columnsStats = projections.stream().map(projection -> {
- ColumnStats value = null;
+ Map<Slot, ColumnStat> childColumnStats = statsDeriveResult.getSlotToColumnStats();
+ Map<Slot, ColumnStat> columnsStats = projections.stream().map(projection -> {
+ ColumnStat value = null;
Set<Slot> slots = projection.getInputSlots();
if (slots.isEmpty()) {
- value = new ColumnStats(1, 1, 1, 0,
- new NullLiteral(), new NullLiteral());
+ value = ColumnStat.createDefaultColumnStats();
} else {
// TODO: just a trick here, need to do real project on column stats
for (Slot slot : slots) {
@@ -358,8 +347,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
}
if (value == null) {
- value = new ColumnStats(1, 1, 1, 0,
- new NullLiteral(), new NullLiteral());
+ value = ColumnStat.createDefaultColumnStats();
}
}
return new SimpleEntry<>(projection.toSlot(), value);
@@ -369,13 +357,13 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
private StatsDeriveResult computeOneRowRelation(OneRowRelation oneRowRelation) {
- Map<Slot, ColumnStats> columnStatsMap = oneRowRelation.getProjects()
+ Map<Slot, ColumnStat> columnStatsMap = oneRowRelation.getProjects()
.stream()
.map(project -> {
- ColumnStats columnStats = new ColumnStats(1, -1, -1, -1,
- new NullLiteral(), new NullLiteral());
+ ColumnStat columnStat = new ColumnStat();
+ columnStat.setNdv(1);
// TODO: compute the literal size
- return Pair.of(project.toSlot(), columnStats);
+ return Pair.of(project.toSlot(), columnStat);
})
.collect(Collectors.toMap(Pair::key, Pair::value));
int rowCount = 1;
@@ -383,12 +371,15 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
}
private StatsDeriveResult computeEmptyRelation(EmptyRelation emptyRelation) {
- Map<Slot, ColumnStats> columnStatsMap = emptyRelation.getProjects()
+ Map<Slot, ColumnStat> columnStatsMap = emptyRelation.getProjects()
.stream()
.map(project -> {
- ColumnStats columnStats = new ColumnStats(0, 0, 0, 0,
- new NullLiteral(), new NullLiteral());
- return Pair.of(project.toSlot(), columnStats);
+ ColumnStat columnStat = new ColumnStat();
+ columnStat.setNdv(0);
+ columnStat.setMaxSizeByte(0);
+ columnStat.setNumNulls(0);
+ columnStat.setAvgSizeByte(0);
+ return Pair.of(project.toSlot(), columnStat);
})
.collect(Collectors.toMap(Pair::key, Pair::value));
int rowCount = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
index e911c739f0..7210f25ec1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
@@ -171,4 +171,5 @@ public abstract class Expression extends AbstractTreeNode<Expression> implements
public boolean hasUnbound() {
return this.anyMatch(Unbound.class::isInstance);
}
+
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Substring.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Substring.java
index ea751658c5..a3e69a9a06 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Substring.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Substring.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.typecoercion.ImplicitCastInputTypes;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.VarcharType;
@@ -88,4 +89,9 @@ public class Substring extends ScalarFunction implements ImplicitCastInputTypes,
public List<AbstractDataType> expectedInputTypes() {
return EXPECTED_INPUT_TYPES;
}
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitSubstring(this, context);
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/WeekOfYear.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/WeekOfYear.java
index a39a7cd923..77cf3b7602 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/WeekOfYear.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/WeekOfYear.java
@@ -21,6 +21,7 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.typecoercion.ImplicitCastInputTypes;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.DateTimeType;
import org.apache.doris.nereids.types.IntegerType;
@@ -67,4 +68,8 @@ public class WeekOfYear extends ScalarFunction implements UnaryExpression, Impli
return EXPECTED_INPUT_TYPES;
}
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitWeekOfYear(this, context);
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Year.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Year.java
index 5db71a4d6e..4f593dfc09 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Year.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Year.java
@@ -21,6 +21,7 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.typecoercion.ImplicitCastInputTypes;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.DateTimeType;
import org.apache.doris.nereids.types.DateType;
@@ -68,4 +69,9 @@ public class Year extends ScalarFunction implements UnaryExpression, ImplicitCas
public List<AbstractDataType> expectedInputTypes() {
return EXPECTED_INPUT_TYPES;
}
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitYear(this, context);
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java
index 97f10e7335..f1449427d1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/Literal.java
@@ -217,4 +217,7 @@ public abstract class Literal extends Expression implements LeafExpression {
public abstract LiteralExpr toLegacyLiteral();
+ public boolean isStringLiteral() {
+ return dataType.isStringType();
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java
index ca7982515f..7c0b7bd322 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ExpressionVisitor.java
@@ -67,6 +67,9 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekOfYear;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.literal.CharLiteral;
@@ -358,4 +361,16 @@ public abstract class ExpressionVisitor<R, C> {
public R visitUnboundStar(UnboundStar unboundStar, C context) {
return visitNamedExpression(unboundStar, context);
}
+
+ public R visitYear(Year year, C context) {
+ return visitBoundFunction(year, context);
+ }
+
+ public R visitWeekOfYear(WeekOfYear weekOfYear, C context) {
+ return visitBoundFunction(weekOfYear, context);
+ }
+
+ public R visitSubstring(Substring substring, C context) {
+ return visitBoundFunction(substring, context);
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
index 2adcbe9ab4..cf8cf9fe2c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/Utils.java
@@ -26,6 +26,9 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.apache.commons.lang3.StringUtils;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@@ -194,4 +197,8 @@ public class Utils {
return conjuncts.stream().collect(Collectors.partitioningBy(
expr -> expr.anyMatch(slots::contains)));
}
+
+ public static LocalDateTime getLocalDatetimeFromLong(long dateTime) {
+ return LocalDateTime.ofInstant(Instant.ofEpochSecond(dateTime), ZoneId.systemDefault());
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/AggregationNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/AggregationNode.java
index a1c6b19d1c..7922e60fdd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/AggregationNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/AggregationNode.java
@@ -190,7 +190,7 @@ public class AggregationNode extends PlanNode {
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticEvalNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticEvalNode.java
index 34462a5fd8..00471d7d3a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticEvalNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticEvalNode.java
@@ -143,7 +143,7 @@ public class AnalyticEvalNode extends PlanNode {
return;
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/AssertNumRowsNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/AssertNumRowsNode.java
index 251cc48225..da29e93497 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/AssertNumRowsNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/AssertNumRowsNode.java
@@ -64,7 +64,7 @@ public class AssertNumRowsNode extends PlanNode {
super.computeStats(analyzer);
if (analyzer.safeIsEnableJoinReorderBasedCost()) {
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
if (LOG.isDebugEnabled()) {
LOG.debug("stats AssertNumRows: cardinality={}", cardinality);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/CrossJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/CrossJoinNode.java
index 2e06155698..8d4c13ba89 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/CrossJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/CrossJoinNode.java
@@ -94,7 +94,7 @@ public class CrossJoinNode extends PlanNode {
return;
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EmptySetNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EmptySetNode.java
index 1b088d917f..e99d32a970 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/EmptySetNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EmptySetNode.java
@@ -48,7 +48,7 @@ public class EmptySetNode extends PlanNode {
@Override
public void computeStats(Analyzer analyzer) throws UserException {
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
avgRowSize = 0;
numNodes = 1;
if (LOG.isDebugEnabled()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java
index 5cae77c483..fd4da60790 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java
@@ -123,7 +123,7 @@ public class ExchangeNode extends PlanNode {
protected void computeStats(Analyzer analyzer) throws UserException {
Preconditions.checkState(children.size() == 1);
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Exchange:" + id + ", cardinality: " + cardinality);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
index 5cd00ba510..ce4a2f4648 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
@@ -838,7 +838,7 @@ public class HashJoinNode extends PlanNode {
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats HashJoin:" + id + ", cardinality: " + cardinality);
@@ -961,7 +961,7 @@ public class HashJoinNode extends PlanNode {
Preconditions.checkState(joinOp.isSemiJoin());
// Return -1 if the cardinality of the returned side is unknown.
- long cardinality;
+ double cardinality;
if (joinOp == JoinOperator.RIGHT_SEMI_JOIN || joinOp == JoinOperator.RIGHT_ANTI_JOIN) {
if (getChild(1).cardinality == -1) {
return -1;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java
index 09c2d56f4e..2651182ec1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/JdbcScanNode.java
@@ -170,7 +170,7 @@ public class JdbcScanNode extends ScanNode {
numNodes = numNodes <= 0 ? 1 : numNodes;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
index ba9a247658..a04b531159 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java
@@ -171,6 +171,6 @@ public class MysqlScanNode extends ScanNode {
numNodes = numNodes <= 0 ? 1 : numNodes;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
index bcc567d0e5..e3876c803b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java
@@ -223,6 +223,6 @@ public class OdbcScanNode extends ScanNode {
numNodes = numNodes <= 0 ? 1 : numNodes;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index e813bf03e2..c9d2858727 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -534,7 +534,7 @@ public class OlapScanNode extends ScanNode {
private void computeInaccurateCardinality() throws UserException {
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
private Collection<Long> partitionPrune(PartitionInfo partitionInfo,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
index 723cee4f31..afb1fd1655 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
@@ -872,7 +872,7 @@ public abstract class PlanNode extends TreeNode<PlanNode> implements PlanStats {
private void applySelectivity() {
double selectivity = computeSelectivity();
Preconditions.checkState(cardinality >= 0);
- long preConjunctCardinality = cardinality;
+ double preConjunctCardinality = cardinality;
cardinality = Math.round(cardinality * selectivity);
// don't round cardinality down to zero for safety.
if (cardinality == 0 && preConjunctCardinality > 0) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
index f9f136f208..d18c6b33ad 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RepeatNode.java
@@ -102,7 +102,7 @@ public class RepeatNode extends PlanNode {
numNodes = 1;
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Sort: cardinality=" + cardinality);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SelectNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SelectNode.java
index 92736c2e51..d14de30d3c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SelectNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SelectNode.java
@@ -73,7 +73,7 @@ public class SelectNode extends PlanNode {
return;
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Select: cardinality={}", this.cardinality);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index 1012f3bb3f..d5ab71573f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -178,7 +178,7 @@ public class SortNode extends PlanNode {
}
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
if (LOG.isDebugEnabled()) {
LOG.debug("stats Sort: cardinality=" + cardinality);
@@ -195,7 +195,7 @@ public class SortNode extends PlanNode {
cardinality = Math.min(cardinality, limit);
}
}
- LOG.debug("stats Sort: cardinality=" + Long.toString(cardinality));
+ LOG.debug("stats Sort: cardinality=" + Double.toString(cardinality));
}
public void init(Analyzer analyzer) throws UserException {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java
index 9b0373f4cf..b6eac825dd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/TableFunctionNode.java
@@ -149,7 +149,7 @@ public class TableFunctionNode extends PlanNode {
super.computeStats(analyzer);
StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this);
- cardinality = statsDeriveResult.getRowCount();
+ cardinality = (long) statsDeriveResult.getRowCount();
}
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index e3f966c904..ee1a5e7627 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -227,6 +227,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_CBO_STATISTICS = "enable_cbo_statistics";
+ public static final String ENABLE_NEREIDS_STATS_DERIVE_V2 = "enable_nereids_stats_derive_v2";
+
// session origin value
public Map<Field, String> sessionOriginValue = new HashMap<Field, String>();
// check stmt is or not [select /*+ SET_VAR(...)*/ ...]
@@ -586,6 +588,9 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = ENABLE_CBO_STATISTICS)
public boolean enableCboStatistics = false;
+ @VariableMgr.VarAttr(name = ENABLE_NEREIDS_STATS_DERIVE_V2)
+ public boolean enableNereidsStatsDeriveV2 = false;
+
public String getBlockEncryptionMode() {
return blockEncryptionMode;
}
@@ -1146,6 +1151,14 @@ public class SessionVariable implements Serializable, Writable {
this.enableSingleReplicaInsert = enableSingleReplicaInsert;
}
+ public boolean isEnableNereidsStatsDeriveV2() {
+ return enableNereidsStatsDeriveV2;
+ }
+
+ public void setEnableNereidsStatsDeriveV2(boolean enableNereidsStatsDeriveV2) {
+ this.enableNereidsStatsDeriveV2 = enableNereidsStatsDeriveV2;
+ }
+
/**
* Serialize to thrift object.
* Used for rest api.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java
index 5c6208d887..b5bf250535 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java
@@ -35,7 +35,7 @@ public class AnalyticEvalStatsDerive extends BaseStatsDerive {
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
- rowCount = rowCount == -1 ? childrenStatsResult.get(0).getRowCount() : rowCount;
+ rowCount = (long) (rowCount == -1 ? childrenStatsResult.get(0).getRowCount() : rowCount);
applyConjunctsSelectivity();
capRowCountAtLimit();
if (LOG.isDebugEnabled()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
index 85d3859b7a..c43666d356 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java
@@ -75,7 +75,7 @@ public class BaseStatsDerive {
private void applySelectivity() {
double selectivity = computeSelectivity();
Preconditions.checkState(rowCount >= 0);
- long preConjunctrowCount = rowCount;
+ double preConjunctrowCount = rowCount;
rowCount = Math.round(rowCount * selectivity);
// don't round rowCount down to zero for safety.
if (rowCount == 0 && preConjunctrowCount > 0) {
@@ -144,7 +144,7 @@ public class BaseStatsDerive {
// Currently it simply adds the number of rows of children
protected long deriveRowCount() {
for (StatsDeriveResult statsDeriveResult : childrenStatsResult) {
- rowCount = Math.max(rowCount, statsDeriveResult.getRowCount());
+ rowCount = (long) Math.max(rowCount, statsDeriveResult.getRowCount());
}
applyConjunctsSelectivity();
capRowCountAtLimit();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java
new file mode 100644
index 0000000000..a8ed00b031
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStat.java
@@ -0,0 +1,426 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.analysis.BoolLiteral;
+import org.apache.doris.analysis.DateLiteral;
+import org.apache.doris.analysis.DecimalLiteral;
+import org.apache.doris.analysis.FloatLiteral;
+import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.analysis.LargeIntLiteral;
+import org.apache.doris.analysis.LiteralExpr;
+import org.apache.doris.analysis.StringLiteral;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.util.Util;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Predicate;
+
+/**
+ * There are the statistics of column.
+ * The column stats are mainly used to provide input for the Optimizer's cost model.
+ * <p>
+ * The description of column stats are following:
+ * 1. @ndv: The number distinct values of column.
+ * 2. @avgSize: The average size of column. The unit is bytes.
+ * 3. @maxSize: The max size of column. The unit is bytes.
+ * 4. @numNulls: The number of nulls.
+ * 5. @minValue: The min value of column.
+ * 6. @maxValue: The max value of column.
+ * <p>
+ * The granularity of the statistics is whole table.
+ * For example:
+ * "@ndv = 10" means that the number distinct values is 10 in the whole table.
+ */
+public class ColumnStat {
+
+ public static final StatsType NDV = StatsType.NDV;
+ public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
+ public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
+ public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
+ public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
+ public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
+
+ public static final ColumnStat UNKNOWN = new ColumnStat();
+
+ private static final Predicate<Double> DESIRED_NDV_PRED = (v) -> v >= -1L;
+ private static final Predicate<Double> DESIRED_AVG_SIZE_PRED = (v) -> (v == -1) || (v >= 0);
+ private static final Predicate<Double> DESIRED_MAX_SIZE_PRED = (v) -> v >= -1L;
+ private static final Predicate<Double> DESIRED_NUM_NULLS_PRED = (v) -> v >= -1L;
+
+ private static final Set<Type> MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>();
+
+ static {
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.VARCHAR);
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.CHAR);
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.HLL);
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.BITMAP);
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.ARRAY);
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.STRUCT);
+ MAX_MIN_UNSUPPORTED_TYPE.add(Type.MAP);
+ }
+
+ private double ndv = -1;
+ private double avgSizeByte = -1;
+ private double maxSizeByte = -1;
+ private double numNulls = -1;
+ private double minValue = Double.NaN;
+ private double maxValue = Double.NaN;
+ // For display only.
+ private LiteralExpr minExpr;
+ private LiteralExpr maxExpr;
+
+ public static ColumnStat createDefaultColumnStats() {
+ ColumnStat columnStat = new ColumnStat();
+ columnStat.setAvgSizeByte(1);
+ columnStat.setMaxSizeByte(1);
+ columnStat.setNdv(1);
+ columnStat.setNumNulls(0);
+ return columnStat;
+ }
+
+ public static boolean isInvalid(ColumnStat stats) {
+ return stats == UNKNOWN;
+ }
+
+ public ColumnStat() {
+ }
+
+ public ColumnStat(ColumnStat other) {
+ this.ndv = other.ndv;
+ this.avgSizeByte = other.avgSizeByte;
+ this.maxSizeByte = other.maxSizeByte;
+ this.numNulls = other.numNulls;
+ this.minValue = other.minValue;
+ this.maxValue = other.maxValue;
+ }
+
+ public ColumnStat(double ndv, double avgSizeByte,
+ double maxSizeByte, double numNulls, double minValue, double maxValue) {
+ this.ndv = ndv;
+ this.avgSizeByte = avgSizeByte;
+ this.maxSizeByte = maxSizeByte;
+ this.numNulls = numNulls;
+ this.minValue = minValue;
+ this.maxValue = maxValue;
+ }
+
+ public double getNdv() {
+ return ndv;
+ }
+
+ public double getAvgSizeByte() {
+ return avgSizeByte;
+ }
+
+ public double getMaxSizeByte() {
+ return maxSizeByte;
+ }
+
+ public double getNumNulls() {
+ return numNulls;
+ }
+
+ public double getMinValue() {
+ return minValue;
+ }
+
+ public double getMaxValue() {
+ return maxValue;
+ }
+
+ public void setNdv(double ndv) {
+ this.ndv = ndv;
+ }
+
+ public void setAvgSizeByte(double avgSizeByte) {
+ this.avgSizeByte = avgSizeByte;
+ }
+
+ public void setMaxSizeByte(double maxSizeByte) {
+ this.maxSizeByte = maxSizeByte;
+ }
+
+ public void setNumNulls(double numNulls) {
+ this.numNulls = numNulls;
+ }
+
+ public void setMinValue(double minValue) {
+ this.minValue = minValue;
+ }
+
+ public void setMaxValue(double maxValue) {
+ this.maxValue = maxValue;
+ }
+
+ public void updateStats(Type columnType, Map<StatsType, String> statsTypeToValue) throws AnalysisException {
+ for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
+ StatsType statsType = entry.getKey();
+ switch (statsType) {
+ case NDV:
+ ndv = Util.getDoublePropertyOrDefault(entry.getValue(), ndv,
+ DESIRED_NDV_PRED, NDV + " should >= -1");
+ break;
+ case AVG_SIZE:
+ avgSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), avgSizeByte,
+ DESIRED_AVG_SIZE_PRED, AVG_SIZE + " should (>=0) or (=-1)");
+ break;
+ case MAX_SIZE:
+ maxSizeByte = Util.getDoublePropertyOrDefault(entry.getValue(), maxSizeByte,
+ DESIRED_MAX_SIZE_PRED, MAX_SIZE + " should >=-1");
+ break;
+ case NUM_NULLS:
+ numNulls = Util.getDoublePropertyOrDefault(entry.getValue(), numNulls,
+ DESIRED_NUM_NULLS_PRED, NUM_NULLS + " should >=-1");
+ break;
+ case MIN_VALUE:
+ if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) {
+ minValue = Double.NEGATIVE_INFINITY;
+ } else {
+ minExpr = readableValue(columnType, entry.getValue());
+ minValue = convertToDouble(columnType, entry.getValue());
+ }
+ break;
+ case MAX_VALUE:
+ if (MAX_MIN_UNSUPPORTED_TYPE.contains(statsType)) {
+ maxValue = Double.NEGATIVE_INFINITY;
+ } else {
+ maxExpr = readableValue(columnType, entry.getValue());
+ maxValue = convertToDouble(columnType, entry.getValue());
+ }
+ break;
+ default:
+ throw new AnalysisException("Unknown stats type: " + statsType);
+ }
+ }
+ }
+
+ public List<String> getShowInfo() {
+ List<String> result = Lists.newArrayList();
+ result.add(Double.toString(ndv));
+ result.add(Double.toString(avgSizeByte));
+ result.add(Double.toString(maxSizeByte));
+ result.add(Double.toString(numNulls));
+ result.add(minExpr == null ? "N/A" : minExpr.toSql());
+ result.add(maxExpr == null ? "N/A" : maxExpr.toSql());
+ return result;
+ }
+
+ private double convertToDouble(Type type, String columnValue) throws AnalysisException {
+ Preconditions.checkArgument(type.isScalarType());
+ try {
+ ScalarType scalarType = (ScalarType) type;
+
+ // check if default value is valid.
+ // if not, some literal constructor will throw AnalysisException
+ PrimitiveType primitiveType = scalarType.getPrimitiveType();
+ switch (primitiveType) {
+ case BOOLEAN:
+ return Boolean.parseBoolean(columnValue) ? 1.0 : 0.0;
+ case TINYINT:
+ case SMALLINT:
+ case INT:
+ case BIGINT:
+ case LARGEINT:
+ case FLOAT:
+ // the min max value will loose precision when value type is double.
+ case DOUBLE:
+ case DECIMALV2:
+ case DECIMAL32:
+ case DECIMAL64:
+ case DECIMAL128:
+ return Double.parseDouble(columnValue);
+ case DATE:
+ case DATEV2:
+ DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ return LocalDateTime
+ .parse(columnValue, timeFormatter)
+ .atZone(ZoneId.systemDefault()).toInstant().getEpochSecond();
+ case DATETIMEV2:
+ case DATETIME:
+ timeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
+ return LocalDateTime
+ .parse(columnValue, timeFormatter)
+ .atZone(ZoneId.systemDefault()).toInstant().getEpochSecond();
+ case CHAR:
+ case VARCHAR:
+ case HLL:
+ case BITMAP:
+ case ARRAY:
+ case MAP:
+ case STRUCT:
+ default:
+ throw new AnalysisException("Unsupported setting this type: " + type + " of min max value");
+ }
+ } catch (Exception e) {
+ throw new AnalysisException(e.getMessage(), e);
+ }
+
+ }
+
+ public ColumnStat copy() {
+ return new ColumnStat(this);
+ }
+
+ public ColumnStat updateBySelectivity(double selectivity, double rowCount) {
+ ndv = ndv * selectivity;
+ numNulls = (long) Math.ceil(numNulls * selectivity);
+ if (ndv > rowCount) {
+ ndv = rowCount;
+ }
+ if (numNulls > rowCount) {
+ numNulls = rowCount;
+ }
+ return this;
+ }
+
+ public boolean hasIntersect(ColumnStat another) {
+ double leftMin = this.getMinValue();
+ double rightMin = another.getMinValue();
+ double leftMax = this.getMaxValue();
+ double rightMax = another.getMaxValue();
+ return Math.max(leftMin, rightMin) <= Math.min(leftMax, rightMax);
+ }
+
+ /**
+ * Return default column statistic.
+ */
+ public static ColumnStat getDefaultColumnStats() {
+ return new ColumnStat();
+ }
+
+ /**
+ * Merge column statistics(the original statistics should not be modified)
+ *
+ * @param left statistics to be merged
+ * @param right statistics to be merged
+ */
+ public static ColumnStat mergeColumnStats(ColumnStat left, ColumnStat right) {
+ // merge ndv
+ double leftNdv = left.getNdv();
+ double rightNdv = right.getNdv();
+
+ if (leftNdv == -1) {
+ leftNdv = rightNdv;
+ } else {
+ leftNdv = rightNdv != -1 ? (leftNdv + rightNdv) : leftNdv;
+ }
+
+ double leftAvgSize = left.getAvgSizeByte();
+ double rightAvgSize = right.getAvgSizeByte();
+ if (leftAvgSize == -1) {
+ leftAvgSize = rightAvgSize;
+ } else {
+ leftAvgSize = rightAvgSize != -1 ? ((leftAvgSize + rightAvgSize) / 2) : leftAvgSize;
+ }
+
+ // merge max_size
+ double leftMaxSize = left.getMaxSizeByte();
+ double rightMaxSize = right.getMaxSizeByte();
+ if (leftMaxSize == -1) {
+ leftMaxSize = rightMaxSize;
+ } else {
+ leftMaxSize = Math.max(leftMaxSize, rightMaxSize);
+ }
+
+ // merge num_nulls
+ double leftNumNulls = left.getNumNulls();
+ double rightNumNulls = right.getNumNulls();
+ if (leftNumNulls == -1) {
+ leftNumNulls = rightNumNulls;
+ } else {
+ leftNumNulls = rightNumNulls != -1 ? (leftNumNulls + rightNumNulls) : leftNumNulls;
+ }
+
+ // merge min_value
+ double leftMinValue = left.getMinValue();
+ double rightMinValue = right.getMinValue();
+ leftMinValue = Math.min(leftMinValue, rightMinValue);
+
+ // merge max_value
+ double leftMaxValue = left.getMaxValue();
+ double rightMaxValue = right.getMaxValue();
+ leftMaxValue = Math.max(rightMaxValue, leftMaxValue);
+
+ // generate the new merged-statistics
+ return new ColumnStat(leftNdv, leftAvgSize, leftMaxSize, leftNumNulls, leftMinValue, leftMaxValue);
+ }
+
+ private LiteralExpr readableValue(Type type, String columnValue) throws AnalysisException {
+ Preconditions.checkArgument(type.isScalarType());
+ ScalarType scalarType = (ScalarType) type;
+
+ // check if default value is valid.
+ // if not, some literal constructor will throw AnalysisException
+ PrimitiveType primitiveType = scalarType.getPrimitiveType();
+ switch (primitiveType) {
+ case BOOLEAN:
+ return new BoolLiteral(columnValue);
+ case TINYINT:
+ case SMALLINT:
+ case INT:
+ case BIGINT:
+ return new IntLiteral(columnValue, type);
+ case LARGEINT:
+ return new LargeIntLiteral(columnValue);
+ case FLOAT:
+ // the min max value will loose precision when value type is double.
+ case DOUBLE:
+ return new FloatLiteral(columnValue);
+ case DECIMALV2:
+ case DECIMAL32:
+ case DECIMAL64:
+ case DECIMAL128:
+ DecimalLiteral decimalLiteral = new DecimalLiteral(columnValue);
+ decimalLiteral.checkPrecisionAndScale(scalarType.getScalarPrecision(), scalarType.getScalarScale());
+ return decimalLiteral;
+ case DATE:
+ case DATETIME:
+ case DATEV2:
+ case DATETIMEV2:
+ return new DateLiteral(columnValue, type);
+ case CHAR:
+ case VARCHAR:
+ if (columnValue.length() > scalarType.getLength()) {
+ throw new AnalysisException("Min/Max value is longer than length of column type: "
+ + columnValue);
+ }
+ return new StringLiteral(columnValue);
+ case HLL:
+ case BITMAP:
+ case ARRAY:
+ case MAP:
+ case STRUCT:
+ default:
+ throw new AnalysisException("Unsupported setting this type: " + type + " of min max value");
+ }
+ }
+
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java
deleted file mode 100644
index e4628c8ea6..0000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStats.java
+++ /dev/null
@@ -1,320 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.statistics;
-
-import org.apache.doris.analysis.BoolLiteral;
-import org.apache.doris.analysis.DateLiteral;
-import org.apache.doris.analysis.DecimalLiteral;
-import org.apache.doris.analysis.FloatLiteral;
-import org.apache.doris.analysis.IntLiteral;
-import org.apache.doris.analysis.LargeIntLiteral;
-import org.apache.doris.analysis.LiteralExpr;
-import org.apache.doris.analysis.NullLiteral;
-import org.apache.doris.analysis.StringLiteral;
-import org.apache.doris.catalog.PrimitiveType;
-import org.apache.doris.catalog.ScalarType;
-import org.apache.doris.catalog.Type;
-import org.apache.doris.common.AnalysisException;
-import org.apache.doris.common.util.Util;
-
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-import java.util.List;
-import java.util.Map;
-import java.util.function.Predicate;
-
-/**
- * There are the statistics of column.
- * The column stats are mainly used to provide input for the Optimizer's cost model.
- * <p>
- * The description of column stats are following:
- * 1. @ndv: The number distinct values of column.
- * 2. @avgSize: The average size of column. The unit is bytes.
- * 3. @maxSize: The max size of column. The unit is bytes.
- * 4. @numNulls: The number of nulls.
- * 5. @minValue: The min value of column.
- * 6. @maxValue: The max value of column.
- * <p>
- * The granularity of the statistics is whole table.
- * For example:
- * "@ndv = 10" means that the number distinct values is 10 in the whole table or partition.
- * <p>
- * After the statistics task is successfully completed, update the ColumnStats,
- * ColumnStats should not be updated in any other way.
- */
-public class ColumnStats {
- public static final StatsType NDV = StatsType.NDV;
- public static final StatsType AVG_SIZE = StatsType.AVG_SIZE;
- public static final StatsType MAX_SIZE = StatsType.MAX_SIZE;
- public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
- public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
- public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
-
- private static final Predicate<Long> DESIRED_NDV_PRED = (v) -> v >= -1L;
- private static final Predicate<Float> DESIRED_AVG_SIZE_PRED = (v) -> (v == -1) || (v >= 0);
- private static final Predicate<Long> DESIRED_MAX_SIZE_PRED = (v) -> v >= -1L;
- private static final Predicate<Long> DESIRED_NUM_NULLS_PRED = (v) -> v >= -1L;
-
- private long ndv = -1;
- private float avgSize = -1; // in bytes
- private long maxSize = -1; // in bytes
- private long numNulls = -1;
-
- private LiteralExpr minValue = new NullLiteral();
- private LiteralExpr maxValue = new NullLiteral();
-
- /**
- * Return default column statistic.
- */
- public static ColumnStats getDefaultColumnStats() {
- return new ColumnStats();
- }
-
- /**
- * Merge column statistics(the original statistics should not be modified)
- *
- * @param left statistics to be merged
- * @param right statistics to be merged
- */
- public static ColumnStats mergeColumnStats(ColumnStats left, ColumnStats right) {
- // merge ndv
- long leftNdv = left.getNdv();
- long rightNdv = right.getNdv();
-
- if (leftNdv == -1) {
- leftNdv = rightNdv;
- } else {
- leftNdv = rightNdv != -1 ? (leftNdv + rightNdv) : leftNdv;
- }
-
- // merge avg_size
- float leftAvgSize = left.getAvgSize();
- float rightAvgSize = right.getAvgSize();
- if (leftAvgSize == -1) {
- leftAvgSize = rightAvgSize;
- } else {
- leftAvgSize = rightAvgSize != -1 ? ((leftAvgSize + rightAvgSize) / 2) : leftAvgSize;
- }
-
- // merge max_size
- long leftMaxSize = left.getMaxSize();
- long rightMaxSize = right.getMaxSize();
- if (leftMaxSize == -1) {
- leftMaxSize = rightMaxSize;
- } else {
- leftMaxSize = Math.max(leftMaxSize, rightMaxSize);
- }
-
- // merge num_nulls
- long leftNumNulls = left.getNumNulls();
- long rightNumNulls = right.getNumNulls();
- if (leftNumNulls == -1) {
- leftNumNulls = rightNumNulls;
- } else {
- leftNumNulls = rightNumNulls != -1 ? (leftNumNulls + rightNumNulls) : leftNumNulls;
- }
-
- // merge min_value
- LiteralExpr leftMinValue = left.getMinValue();
- LiteralExpr rightMinValue = right.getMinValue();
- if (leftMinValue == null) {
- leftMinValue = rightMinValue;
- } else {
- leftMinValue = leftMinValue.compareTo(rightMinValue) > 0 ? rightMinValue : leftMinValue;
- }
-
- // merge max_value
- LiteralExpr leftMaxValue = left.getMaxValue();
- LiteralExpr rightMaxValue = right.getMaxValue();
- if (leftMaxValue == null) {
- leftMaxValue = rightMaxValue;
- } else {
- leftMaxValue = leftMaxValue.compareTo(rightMaxValue) < 0 ? rightMaxValue : leftMaxValue;
- }
-
- // generate the new merged-statistics
- return new ColumnStats(leftNdv, leftAvgSize, leftMaxSize, leftNumNulls, leftMinValue, leftMaxValue);
- }
-
- public ColumnStats() {
- }
-
- public ColumnStats(long ndv, float avgSize, long maxSize,
- long numNulls, LiteralExpr minValue, LiteralExpr maxValue) {
- this.ndv = ndv;
- this.avgSize = avgSize;
- this.maxSize = maxSize;
- this.numNulls = numNulls;
- this.minValue = minValue;
- this.maxValue = maxValue;
- }
-
- public ColumnStats(ColumnStats other) {
- this.ndv = other.ndv;
- this.avgSize = other.avgSize;
- this.maxSize = other.maxSize;
- this.numNulls = other.numNulls;
- if (other.minValue != null && !(other.minValue instanceof NullLiteral)) {
- this.minValue = (LiteralExpr) other.minValue.clone();
- }
- if (other.maxValue != null && !(other.minValue instanceof NullLiteral)) {
- this.maxValue = (LiteralExpr) other.maxValue.clone();
- }
- }
-
- public long getNdv() {
- return ndv;
- }
-
- public float getAvgSize() {
- return avgSize;
- }
-
- public long getMaxSize() {
- return maxSize;
- }
-
- public long getNumNulls() {
- return numNulls;
- }
-
- public LiteralExpr getMinValue() {
- return minValue;
- }
-
- public LiteralExpr getMaxValue() {
- return maxValue;
- }
-
- public List<String> getShowInfo() {
- List<String> result = Lists.newArrayList();
- result.add(Long.toString(ndv));
- result.add(Float.toString(avgSize));
- result.add(Long.toString(maxSize));
- result.add(Long.toString(numNulls));
- if (minValue != null) {
- result.add(minValue.getStringValue());
- } else {
- result.add("N/A");
- }
- if (maxValue != null) {
- result.add(maxValue.getStringValue());
- } else {
- result.add("N/A");
- }
- return result;
- }
-
- public ColumnStats copy() {
- return new ColumnStats(this);
- }
-
- /**
- * After the statistics task is successfully completed, update the statistics of the column,
- * statistics should not be updated in any other way.
- */
- public void updateStats(Type columnType, Map<StatsType, String> statsTypeToValue)
- throws AnalysisException {
- for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
- StatsType statsType = entry.getKey();
- switch (statsType) {
- case NDV:
- ndv = Util.getLongPropertyOrDefault(entry.getValue(), ndv,
- DESIRED_NDV_PRED, NDV + " should >= -1");
- break;
- case AVG_SIZE:
- avgSize = Util.getFloatPropertyOrDefault(entry.getValue(), avgSize,
- DESIRED_AVG_SIZE_PRED, AVG_SIZE + " should (>=0) or (=-1)");
- break;
- case MAX_SIZE:
- maxSize = Util.getLongPropertyOrDefault(entry.getValue(), maxSize,
- DESIRED_MAX_SIZE_PRED, MAX_SIZE + " should >=-1");
- break;
- case NUM_NULLS:
- numNulls = Util.getLongPropertyOrDefault(entry.getValue(), numNulls,
- DESIRED_NUM_NULLS_PRED, NUM_NULLS + " should >=-1");
- break;
- case MIN_VALUE:
- minValue = validateColumnValue(columnType, entry.getValue());
- break;
- case MAX_VALUE:
- maxValue = validateColumnValue(columnType, entry.getValue());
- break;
- default:
- throw new AnalysisException("Unknown stats type: " + statsType);
- }
- }
- }
-
- // TODO: The generated statistics should not be modified
- public void updateBySelectivity(double selectivity) {
- ndv = (long) Math.ceil(ndv * selectivity);
- numNulls = (long) Math.ceil(numNulls * selectivity);
- }
-
- private LiteralExpr validateColumnValue(Type type, String columnValue) throws AnalysisException {
- Preconditions.checkArgument(type.isScalarType());
- ScalarType scalarType = (ScalarType) type;
-
- // check if default value is valid.
- // if not, some literal constructor will throw AnalysisException
- PrimitiveType primitiveType = scalarType.getPrimitiveType();
- switch (primitiveType) {
- case BOOLEAN:
- return new BoolLiteral(columnValue);
- case TINYINT:
- case SMALLINT:
- case INT:
- case BIGINT:
- return new IntLiteral(columnValue, type);
- case LARGEINT:
- return new LargeIntLiteral(columnValue);
- case FLOAT:
- // the min max value will loose precision when value type is double.
- case DOUBLE:
- return new FloatLiteral(columnValue);
- case DECIMALV2:
- case DECIMAL32:
- case DECIMAL64:
- case DECIMAL128:
- DecimalLiteral decimalLiteral = new DecimalLiteral(columnValue);
- decimalLiteral.checkPrecisionAndScale(scalarType.getScalarPrecision(), scalarType.getScalarScale());
- return decimalLiteral;
- case DATE:
- case DATETIME:
- case DATEV2:
- case DATETIMEV2:
- return new DateLiteral(columnValue, type);
- case CHAR:
- case VARCHAR:
- if (columnValue.length() > scalarType.getLength()) {
- throw new AnalysisException("Min/Max value is longer than length of column type: "
- + columnValue);
- }
- return new StringLiteral(columnValue);
- case HLL:
- case BITMAP:
- case ARRAY:
- case MAP:
- case STRUCT:
- default:
- throw new AnalysisException("Unsupported setting this type: " + type + " of min max value");
- }
- }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java
index fea8425ff4..b943543dc9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java
@@ -40,13 +40,13 @@ public class CrossJoinStatsDerive extends BaseStatsDerive {
if (childrenStatsResult.get(0).getRowCount() == -1 || childrenStatsResult.get(1).getRowCount() == -1) {
rowCount = -1;
} else {
- rowCount = CheckedMath.checkedMultiply(childrenStatsResult.get(0).getRowCount(),
- childrenStatsResult.get(1).getRowCount());
+ rowCount = CheckedMath.checkedMultiply((long) childrenStatsResult.get(0).getRowCount(),
+ (long) childrenStatsResult.get(1).getRowCount());
applyConjunctsSelectivity();
capRowCountAtLimit();
}
if (LOG.isDebugEnabled()) {
- LOG.debug("stats CrossJoin: rowCount={}", Long.toString(rowCount));
+ LOG.debug("stats CrossJoin: rowCount={}", Double.toString(rowCount));
}
return rowCount;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java
index 6288d1f6a9..268f9240fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java
@@ -31,7 +31,7 @@ public class ExchangeStatsDerive extends BaseStatsDerive {
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
- rowCount = childrenStatsResult.get(0).getRowCount();
+ rowCount = (long) childrenStatsResult.get(0).getRowCount();
capRowCountAtLimit();
return rowCount;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java
index 24b9e102e4..bb4025d86c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java
@@ -96,7 +96,7 @@ public class HashJoinStatsDerive extends BaseStatsDerive {
Preconditions.checkState(joinOp.isSemiJoin());
// Return -1 if the rowCount of the returned side is unknown.
- long rowCount;
+ double rowCount;
if (joinOp == JoinOperator.RIGHT_SEMI_JOIN
|| joinOp == JoinOperator.RIGHT_ANTI_JOIN) {
if (childrenStatsResult.get(1).getRowCount() == -1) {
@@ -111,9 +111,9 @@ public class HashJoinStatsDerive extends BaseStatsDerive {
}
double minSelectivity = 1.0;
for (Expr eqJoinPredicate : eqJoinConjuncts) {
- long lhsNdv = getNdv(eqJoinPredicate.getChild(0));
+ double lhsNdv = getNdv(eqJoinPredicate.getChild(0));
lhsNdv = Math.min(lhsNdv, childrenStatsResult.get(0).getRowCount());
- long rhsNdv = getNdv(eqJoinPredicate.getChild(1));
+ double rhsNdv = getNdv(eqJoinPredicate.getChild(1));
rhsNdv = Math.min(rhsNdv, childrenStatsResult.get(1).getRowCount());
// Skip conjuncts with unknown NDV on either side.
@@ -174,8 +174,8 @@ public class HashJoinStatsDerive extends BaseStatsDerive {
Preconditions.checkState(joinOp.isInnerJoin() || joinOp.isOuterJoin());
Preconditions.checkState(childrenStatsResult.size() == 2);
- long lhsCard = childrenStatsResult.get(0).getRowCount();
- long rhsCard = childrenStatsResult.get(1).getRowCount();
+ long lhsCard = (long) childrenStatsResult.get(0).getRowCount();
+ long rhsCard = (long) childrenStatsResult.get(1).getRowCount();
if (lhsCard == -1 || rhsCard == -1) {
return lhsCard;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
index bbe1dd2f69..7e47a41e0c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java
@@ -84,7 +84,7 @@ public class OlapScanStatsDerive extends BaseStatsDerive {
if (node.getTupleDesc() != null
&& node.getTupleDesc().getTable() != null) {
long tableId = node.getTupleDesc().getTable().getId();
- inputRowCount = Env.getCurrentEnv().getStatisticsManager().getStatistics().getTableStats(tableId)
+ inputRowCount = (long) Env.getCurrentEnv().getStatisticsManager().getStatistics().getTableStats(tableId)
.getRowCount();
}
for (SlotDescriptor slot : node.getTupleDesc().getSlots()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java
index 1ef48fc0c0..248d07b24b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionStats.java
@@ -58,7 +58,7 @@ public class PartitionStats {
private long rowCount = -1;
private long dataSize = -1;
- private final Map<String, ColumnStats> nameToColumnStats = Maps.newConcurrentMap();
+ private final Map<String, ColumnStat> nameToColumnStats = Maps.newConcurrentMap();
/**
* Return a default partition statistic.
@@ -91,20 +91,20 @@ public class PartitionStats {
this.dataSize = dataSize;
}
- public Map<String, ColumnStats> getNameToColumnStats() {
+ public Map<String, ColumnStat> getNameToColumnStats() {
return nameToColumnStats;
}
- public ColumnStats getColumnStats(String columnName) {
+ public ColumnStat getColumnStats(String columnName) {
return nameToColumnStats.get(columnName);
}
/**
* If the column statistics do not exist, the default statistics will be returned.
*/
- public ColumnStats getColumnStatsOrDefault(String columnName) {
+ public ColumnStat getColumnStatsOrDefault(String columnName) {
return nameToColumnStats.getOrDefault(columnName,
- ColumnStats.getDefaultColumnStats());
+ ColumnStat.getDefaultColumnStats());
}
/**
@@ -142,8 +142,8 @@ public class PartitionStats {
public void updateColumnStats(String columnName,
Type columnType,
Map<StatsType, String> statsTypeToValue) throws AnalysisException {
- ColumnStats columnStats = getNotNullColumnStats(columnName);
- columnStats.updateStats(columnType, statsTypeToValue);
+ ColumnStat columnStat = getNotNullColumnStats(columnName);
+ columnStat.updateStats(columnType, statsTypeToValue);
}
/**
@@ -152,12 +152,12 @@ public class PartitionStats {
* @param columnName column name
* @return @ColumnStats
*/
- private ColumnStats getNotNullColumnStats(String columnName) {
- ColumnStats columnStats = nameToColumnStats.get(columnName);
- if (columnStats == null) {
- columnStats = new ColumnStats();
- nameToColumnStats.put(columnName, columnStats);
+ public ColumnStat getNotNullColumnStats(String columnName) {
+ ColumnStat columnStat = nameToColumnStats.get(columnName);
+ if (columnStat == null) {
+ columnStat = new ColumnStat();
+ nameToColumnStats.put(columnName, columnStat);
}
- return columnStats;
+ return columnStat;
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java
index 09a4c4c5df..a4274e4d59 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java
@@ -33,7 +33,7 @@ public class SelectStatsDerive extends BaseStatsDerive {
@Override
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
- rowCount = childrenStatsResult.get(0).getRowCount();
+ rowCount = (long) childrenStatsResult.get(0).getRowCount();
applyConjunctsSelectivity();
capRowCountAtLimit();
return rowCount;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 0f5609ef64..ef30142e62 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -102,9 +102,9 @@ public class Statistics {
* @return column name and @ColumnStats
* @throws AnalysisException if columns stats not exists
*/
- public Map<String, ColumnStats> getColumnStats(long tableId) throws AnalysisException {
+ public Map<String, ColumnStat> getColumnStats(long tableId) throws AnalysisException {
TableStats tableStats = getTableStats(tableId);
- Map<String, ColumnStats> nameToColumnStats = tableStats.getNameToColumnStats();
+ Map<String, ColumnStat> nameToColumnStats = tableStats.getNameToColumnStats();
if (nameToColumnStats == null) {
throw new AnalysisException("Table " + tableId + " has no column statistics");
}
@@ -119,7 +119,7 @@ public class Statistics {
* @return column name and @ColumnStats
* @throws AnalysisException if column stats not exists
*/
- public Map<String, ColumnStats> getColumnStats(long tableId, String partitionName) throws AnalysisException {
+ public Map<String, ColumnStat> getColumnStats(long tableId, String partitionName) throws AnalysisException {
Map<String, PartitionStats> partitionStats = getPartitionStats(tableId, partitionName);
PartitionStats partitionStat = partitionStats.get(partitionName);
if (partitionStat == null) {
@@ -160,7 +160,7 @@ public class Statistics {
}
// TODO: mock statistics need to be removed in the future
- public void mockTableStatsWithRowCount(long tableId, long rowCount) {
+ public void mockTableStatsWithRowCount(long tableId, double rowCount) {
TableStats tableStats = idToTableStats.get(tableId);
if (tableStats == null) {
tableStats = new TableStats(rowCount, 1);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
index 3e58ca3a39..e5bef86409 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java
@@ -334,7 +334,7 @@ public class StatisticsManager {
private List<List<String>> showColumnStats(long tableId) throws AnalysisException {
List<List<String>> result = Lists.newArrayList();
- Map<String, ColumnStats> columnStats = statistics.getColumnStats(tableId);
+ Map<String, ColumnStat> columnStats = statistics.getColumnStats(tableId);
columnStats.forEach((key, stats) -> {
List<String> row = Lists.newArrayList();
row.add(key);
@@ -346,7 +346,7 @@ public class StatisticsManager {
private List<List<String>> showColumnStats(long tableId, String partitionName) throws AnalysisException {
List<List<String>> result = Lists.newArrayList();
- Map<String, ColumnStats> columnStats = statistics.getColumnStats(tableId, partitionName);
+ Map<String, ColumnStat> columnStats = statistics.getColumnStats(tableId, partitionName);
columnStats.forEach((key, stats) -> {
List<String> row = Lists.newArrayList();
row.add(key);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
index 6b392d4296..581750a760 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java
@@ -31,7 +31,7 @@ import java.util.Map.Entry;
* This structure is maintained in each operator to store the statistical information results obtained by the operator.
*/
public class StatsDeriveResult {
- private long rowCount = -1;
+ private double rowCount = -1;
// The data size of the corresponding column in the operator
// The actual key is slotId
private final Map<Id, Float> columnIdToDataSize = Maps.newHashMap();
@@ -39,17 +39,17 @@ public class StatsDeriveResult {
// The actual key is slotId
private final Map<Id, Long> columnIdToNdv = Maps.newHashMap();
- private Map<Slot, ColumnStats> slotToColumnStats;
+ private Map<Slot, ColumnStat> slotToColumnStats;
public boolean isReduced = false;
public int width = 1;
- public StatsDeriveResult(long rowCount, Map<Slot, ColumnStats> slotToColumnStats) {
+ public StatsDeriveResult(double rowCount, Map<Slot, ColumnStat> slotToColumnStats) {
this.rowCount = rowCount;
this.slotToColumnStats = slotToColumnStats;
}
- public StatsDeriveResult(long rowCount, Map<Id, Float> columnIdToDataSize, Map<Id, Long> columnIdToNdv) {
+ public StatsDeriveResult(double rowCount, Map<Id, Float> columnIdToDataSize, Map<Id, Long> columnIdToNdv) {
this.rowCount = rowCount;
this.columnIdToDataSize.putAll(columnIdToDataSize);
this.columnIdToNdv.putAll(columnIdToNdv);
@@ -60,15 +60,15 @@ public class StatsDeriveResult {
this.columnIdToDataSize.putAll(another.columnIdToDataSize);
this.columnIdToNdv.putAll(another.columnIdToNdv);
slotToColumnStats = new HashMap<>();
- for (Entry<Slot, ColumnStats> entry : another.slotToColumnStats.entrySet()) {
+ for (Entry<Slot, ColumnStat> entry : another.slotToColumnStats.entrySet()) {
slotToColumnStats.put(entry.getKey(), entry.getValue().copy());
}
this.isReduced = another.isReduced;
this.width = another.width;
}
- public float computeSize() {
- return Math.max(1, columnIdToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount;
+ public double computeSize() {
+ return Math.max(1, columnIdToDataSize.values().stream().reduce(0F, Float::sum)) * rowCount;
}
/**
@@ -77,7 +77,7 @@ public class StatsDeriveResult {
* @param slotIds all input columns.
* @return sum data size.
*/
- public float computeColumnSize(List<Id> slotIds) {
+ public double computeColumnSize(List<Id> slotIds) {
float count = 0;
boolean exist = false;
@@ -93,11 +93,12 @@ public class StatsDeriveResult {
return count * rowCount;
}
- public void setRowCount(long rowCount) {
+ public StatsDeriveResult setRowCount(double rowCount) {
this.rowCount = rowCount;
+ return this;
}
- public long getRowCount() {
+ public double getRowCount() {
return rowCount;
}
@@ -109,18 +110,18 @@ public class StatsDeriveResult {
return columnIdToDataSize;
}
- public Map<Slot, ColumnStats> getSlotToColumnStats() {
+ public Map<Slot, ColumnStat> getSlotToColumnStats() {
return slotToColumnStats;
}
- public void setSlotToColumnStats(Map<Slot, ColumnStats> slotToColumnStats) {
+ public void setSlotToColumnStats(Map<Slot, ColumnStat> slotToColumnStats) {
this.slotToColumnStats = slotToColumnStats;
}
public StatsDeriveResult updateRowCountBySelectivity(double selectivity) {
rowCount *= selectivity;
- for (Entry<Slot, ColumnStats> entry : slotToColumnStats.entrySet()) {
- entry.getValue().updateBySelectivity(selectivity);
+ for (Entry<Slot, ColumnStat> entry : slotToColumnStats.entrySet()) {
+ entry.getValue().updateBySelectivity(selectivity, rowCount);
}
return this;
}
@@ -129,15 +130,15 @@ public class StatsDeriveResult {
if (limit > 0 && rowCount > 0 && rowCount > limit) {
double selectivity = ((double) limit) / rowCount;
rowCount = limit;
- for (Entry<Slot, ColumnStats> entry : slotToColumnStats.entrySet()) {
- entry.getValue().updateBySelectivity(selectivity);
+ for (Entry<Slot, ColumnStat> entry : slotToColumnStats.entrySet()) {
+ entry.getValue().updateBySelectivity(selectivity, rowCount);
}
}
return this;
}
public StatsDeriveResult merge(StatsDeriveResult other) {
- for (Entry<Slot, ColumnStats> entry : other.getSlotToColumnStats().entrySet()) {
+ for (Entry<Slot, ColumnStat> entry : other.getSlotToColumnStats().entrySet()) {
this.slotToColumnStats.put(entry.getKey(), entry.getValue().copy());
}
return this;
@@ -163,4 +164,22 @@ public class StatsDeriveResult {
return stats.toString();
}
}
+
+ public StatsDeriveResult updateRowCountOnCopy(double selectivity) {
+ StatsDeriveResult copy = new StatsDeriveResult(this);
+ copy.setRowCount(rowCount * selectivity);
+ for (Entry<Slot, ColumnStat> entry : copy.slotToColumnStats.entrySet()) {
+ entry.getValue().updateBySelectivity(selectivity, rowCount);
+ }
+ return copy;
+ }
+
+ public StatsDeriveResult addColumnStats(Slot slot, ColumnStat stats) {
+ slotToColumnStats.put(slot, stats);
+ return this;
+ }
+
+ public ColumnStat getColumnStatsBySlot(Slot slot) {
+ return slotToColumnStats.get(slot);
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java
index 89abbccf33..80a85f066c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java
@@ -32,7 +32,7 @@ public class TableFunctionStatsDerive extends BaseStatsDerive {
protected long deriveRowCount() {
Preconditions.checkState(!childrenStatsResult.isEmpty());
// TODO the rowCount = child rowCount * rowCount of list column
- rowCount = childrenStatsResult.get(0).getRowCount();
+ rowCount = (long) childrenStatsResult.get(0).getRowCount();
return rowCount;
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
index 3267580c5f..f2a245a415 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java
@@ -24,6 +24,7 @@ import org.apache.doris.common.util.Util;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
@@ -53,13 +54,13 @@ public class TableStats {
public static final StatsType DATA_SIZE = StatsType.DATA_SIZE;
public static final StatsType ROW_COUNT = StatsType.ROW_COUNT;
- private static final Predicate<Long> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
+ private static final Predicate<Double> DESIRED_ROW_COUNT_PRED = (v) -> v >= -1L;
private static final Predicate<Long> DESIRED_DATA_SIZE_PRED = (v) -> v >= -1L;
- private long rowCount = -1;
+ private double rowCount = -1;
private long dataSize = -1;
private final Map<String, PartitionStats> nameToPartitionStats = Maps.newConcurrentMap();
- private final Map<String, ColumnStats> nameToColumnStats = Maps.newConcurrentMap();
+ private final Map<String, ColumnStat> nameToColumnStats = Maps.newConcurrentMap();
/**
* Return a default partition statistic.
@@ -71,13 +72,13 @@ public class TableStats {
public TableStats() {
}
- public TableStats(long rowCount, long dataSize) {
+ public TableStats(double rowCount, long dataSize) {
this.rowCount = rowCount;
this.dataSize = dataSize;
}
- public long getRowCount() {
- if (rowCount == -1) {
+ public double getRowCount() {
+ if (rowCount == -1) {
return nameToPartitionStats.values().stream()
.filter(partitionStats -> partitionStats.getRowCount() != -1)
.mapToLong(PartitionStats::getRowCount).sum();
@@ -85,6 +86,10 @@ public class TableStats {
return rowCount;
}
+ void setRowCount(double rowCount) {
+ this.rowCount = rowCount;
+ }
+
public long getDataSize() {
if (dataSize == -1) {
return nameToPartitionStats.values().stream()
@@ -98,7 +103,7 @@ public class TableStats {
return nameToPartitionStats;
}
- public Map<String, ColumnStats> getNameToColumnStats() {
+ public Map<String, ColumnStat> getNameToColumnStats() {
if (nameToColumnStats.isEmpty()) {
return getAggPartitionColStats();
}
@@ -117,28 +122,12 @@ public class TableStats {
PartitionStats.getDefaultPartitionStats());
}
- public ColumnStats getColumnStats(String columnName) {
- return nameToColumnStats.get(columnName);
- }
-
/**
* If the column statistics do not exist, the default statistics will be returned.
*/
- public ColumnStats getColumnStatsOrDefault(String columnName) {
+ public ColumnStat getColumnStatsOrDefault(String columnName) {
return nameToColumnStats.getOrDefault(columnName,
- ColumnStats.getDefaultColumnStats());
- }
-
- public List<String> getShowInfo() {
- List<String> result = Lists.newArrayList();
- result.add(Long.toString(getRowCount()));
- result.add(Long.toString(getDataSize()));
- return result;
- }
-
- public List<String> getShowInfo(String partitionName) {
- PartitionStats partitionStats = nameToPartitionStats.get(partitionName);
- return partitionStats.getShowInfo();
+ ColumnStat.getDefaultColumnStats());
}
/**
@@ -148,7 +137,7 @@ public class TableStats {
public void updateTableStats(Map<StatsType, String> statsTypeToValue) throws AnalysisException {
for (Map.Entry<StatsType, String> entry : statsTypeToValue.entrySet()) {
if (entry.getKey() == ROW_COUNT) {
- rowCount = Util.getLongPropertyOrDefault(entry.getValue(), rowCount,
+ rowCount = Util.getDoublePropertyOrDefault(entry.getValue(), rowCount,
DESIRED_ROW_COUNT_PRED, ROW_COUNT + " should >= -1");
} else if (entry.getKey() == DATA_SIZE) {
dataSize = Util.getLongPropertyOrDefault(entry.getValue(), dataSize,
@@ -173,24 +162,8 @@ public class TableStats {
*/
public void updateColumnStats(String columnName, Type columnType, Map<StatsType, String> statsTypeToValue)
throws AnalysisException {
- ColumnStats columnStats = getNotNullColumnStats(columnName);
- columnStats.updateStats(columnType, statsTypeToValue);
- }
-
- private Map<String, ColumnStats> getAggPartitionColStats() {
- Map<String, ColumnStats> aggColumnStats = Maps.newConcurrentMap();
- for (PartitionStats partitionStats : nameToPartitionStats.values()) {
- partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> {
- if (!aggColumnStats.containsKey(colName)) {
- aggColumnStats.put(colName, columnStats);
- } else {
- ColumnStats oldColumnStats = aggColumnStats.get(colName);
- ColumnStats newColumnStats = ColumnStats.mergeColumnStats(columnStats, oldColumnStats);
- aggColumnStats.put(colName, newColumnStats);
- }
- });
- }
- return aggColumnStats;
+ ColumnStat columnStat = getColumnStats(columnName);
+ columnStat.updateStats(columnType, statsTypeToValue);
}
/**
@@ -214,12 +187,126 @@ public class TableStats {
* @param columnName column name
* @return @ColumnStats
*/
- private ColumnStats getNotNullColumnStats(String columnName) {
- ColumnStats columnStats = nameToColumnStats.get(columnName);
- if (columnStats == null) {
- columnStats = new ColumnStats();
- nameToColumnStats.put(columnName, columnStats);
+ private ColumnStat getNotNullColumnStats(String columnName) {
+ ColumnStat columnStat = nameToColumnStats.get(columnName);
+ if (columnStat == null) {
+ columnStat = new ColumnStat();
+ nameToColumnStats.put(columnName, columnStat);
}
- return columnStats;
+ return columnStat;
+ }
+
+ public ColumnStat getColumnStats(String columnName) {
+ ColumnStat columnStat = nameToColumnStats.get(columnName);
+ if (columnStat == null) {
+ columnStat = new ColumnStat();
+ nameToColumnStats.put(columnName, columnStat);
+ }
+ return columnStat;
+ }
+
+ public ColumnStat getColumnStatCopy(String columnName) {
+ ColumnStat columnStat = getColumnStats(columnName);
+ return columnStat.copy();
+ }
+
+ public List<String> getShowInfo() {
+ List<String> result = Lists.newArrayList();
+ result.add(Double.toString(getRowCount()));
+ result.add(Long.toString(getDataSize()));
+ return result;
+ }
+
+ public List<String> getShowInfo(String partitionName) {
+ PartitionStats partitionStats = nameToPartitionStats.get(partitionName);
+ return partitionStats.getShowInfo();
+ }
+
+ private Map<String, ColumnStat> getAggPartitionColStats() {
+ Map<String, ColumnStat> aggColumnStats = new HashMap<>();
+ for (PartitionStats partitionStats : nameToPartitionStats.values()) {
+ partitionStats.getNameToColumnStats().forEach((colName, columnStats) -> {
+ if (!aggColumnStats.containsKey(colName)) {
+ aggColumnStats.put(colName, columnStats);
+ } else {
+ ColumnStat tblColStats = aggColumnStats.get(colName);
+ mergePartitionColumnStats(tblColStats, columnStats);
+ }
+ });
+ }
+
+ return aggColumnStats;
+ }
+
+ private void mergePartitionColumnStats(ColumnStat leftStats, ColumnStat rightStats) {
+ if (leftStats.getNdv() == -1) {
+ if (rightStats.getNdv() != -1) {
+ leftStats.setNdv(rightStats.getNdv());
+ }
+ } else {
+ if (rightStats.getNdv() != -1) {
+ double ndv = leftStats.getNdv() + rightStats.getNdv();
+ leftStats.setNdv(ndv);
+ }
+ }
+
+ if (leftStats.getAvgSizeByte() == -1) {
+ if (rightStats.getAvgSizeByte() != -1) {
+ leftStats.setAvgSizeByte(rightStats.getAvgSizeByte());
+ }
+ } else {
+ if (rightStats.getAvgSizeByte() != -1) {
+ double avgSize = (leftStats.getAvgSizeByte() + rightStats.getAvgSizeByte()) / 2;
+ leftStats.setAvgSizeByte(avgSize);
+ }
+ }
+
+ if (leftStats.getMaxSizeByte() == -1) {
+ if (rightStats.getMaxSizeByte() != -1) {
+ leftStats.setMaxSizeByte(rightStats.getMaxSizeByte());
+ }
+ } else {
+ if (rightStats.getMaxSizeByte() != -1) {
+ double maxSize = Math.max(leftStats.getMaxSizeByte(), rightStats.getMaxSizeByte());
+ leftStats.setMaxSizeByte(maxSize);
+ }
+ }
+
+ if (leftStats.getNumNulls() == -1) {
+ if (rightStats.getNumNulls() != -1) {
+ leftStats.setNumNulls(rightStats.getNumNulls());
+ }
+ } else {
+ if (rightStats.getNumNulls() != -1) {
+ double numNulls = leftStats.getNumNulls() + rightStats.getNumNulls();
+ leftStats.setNumNulls(numNulls);
+ }
+ }
+
+ if (Double.isNaN(leftStats.getMinValue())) {
+ if (!Double.isNaN(rightStats.getMinValue())) {
+ leftStats.setMinValue(rightStats.getMinValue());
+ }
+ } else if (!Double.isNaN(rightStats.getMinValue())) {
+ double minValue = Math.max(leftStats.getMinValue(), rightStats.getMinValue());
+ leftStats.setMinValue(minValue);
+ }
+
+
+ if (Double.isNaN(leftStats.getMaxValue())) {
+ if (!Double.isNaN(rightStats.getMaxValue())) {
+ leftStats.setMaxValue(rightStats.getMaxValue());
+ }
+ } else if (!Double.isNaN(rightStats.getMaxValue())) {
+ double maxValue = Math.min(leftStats.getMaxValue(), rightStats.getMaxValue());
+ leftStats.setMaxValue(maxValue);
+ }
+ }
+
+ /**
+ * This method is for unit test.
+ */
+ public void putColumnStats(String name, ColumnStat columnStat) {
+ nameToColumnStats.put(name, columnStat);
}
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatTest.java
similarity index 98%
rename from fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatsTest.java
rename to fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatTest.java
index e5253395f3..f8d0bc77be 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatsTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatTest.java
@@ -26,7 +26,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-public class ColumnStatsTest {
+public class ColumnStatTest {
@Test
public void testSerialization() throws Exception {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java b/fe/fe-core/src/test/java/org/apache/doris/common/CheckedMathTest.java
similarity index 58%
copy from fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java
copy to fe/fe-core/src/test/java/org/apache/doris/common/CheckedMathTest.java
index 6288d1f6a9..a3e42038d2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/CheckedMathTest.java
@@ -15,24 +15,22 @@
// specific language governing permissions and limitations
// under the License.
-package org.apache.doris.statistics;
+package org.apache.doris.common;
-import com.google.common.base.Preconditions;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
-/**
- * Derive ExchangeNode statistics.
- */
-public class ExchangeStatsDerive extends BaseStatsDerive {
- @Override
- public StatsDeriveResult deriveStats() {
- return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv());
- }
+class CheckedMathTest {
- @Override
- protected long deriveRowCount() {
- Preconditions.checkState(!childrenStatsResult.isEmpty());
- rowCount = childrenStatsResult.get(0).getRowCount();
- capRowCountAtLimit();
- return rowCount;
+ @Test
+ void checkedMultiply() {
+ double a = 12.91;
+ double b = 21.44;
+ double res = CheckedMath.checkedMultiply(a, b);
+ Assertions.assertEquals(a * b, res, 0.01);
+ a = Double.MAX_VALUE;
+ b = 5;
+ res = CheckedMath.checkedMultiply(a, b);
+ Assertions.assertEquals(Double.MAX_VALUE, res, 0.01);
}
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java
index d4c8c8e47e..273e4496fc 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/cascades/DeriveStatsJobTest.java
@@ -17,7 +17,6 @@
package org.apache.doris.nereids.jobs.cascades;
-import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.common.AnalysisException;
@@ -37,7 +36,7 @@ import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.util.MemoTestUtils;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsManager;
import org.apache.doris.statistics.StatsDeriveResult;
@@ -83,16 +82,17 @@ public class DeriveStatsJobTest {
}
private LogicalOlapScan constructOlapSCan() throws AnalysisException {
- ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5,
- new NullLiteral(), new NullLiteral());
+ ColumnStat columnStats1 = new ColumnStat(10, 0, 0, 5,
+ Double.NaN, Double.NaN);
new MockUp<TableStats>(TableStats.class) {
@Mock
- public ColumnStats getColumnStats(String columnName) {
+ public ColumnStat getColumnStats(String columnName) {
return columnStats1;
}
};
long tableId1 = 0;
+
Statistics statistics = new Statistics();
List<String> qualifier = ImmutableList.of("test", "t");
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java
new file mode 100644
index 0000000000..0f61a48309
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/ExpressionEstimationTest.java
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.nereids.trees.expressions.Add;
+import org.apache.doris.nereids.trees.expressions.Divide;
+import org.apache.doris.nereids.trees.expressions.Multiply;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.Subtract;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.statistics.ColumnStat;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import org.apache.commons.math3.util.Precision;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class ExpressionEstimationTest {
+
+ // MAX(a)
+ // a belongs to [0, 500]
+ @Test
+ public void test1() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Max max = new Max(a);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 0, 500));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ ColumnStat estimated = ExpressionEstimation.estimate(max, stat);
+ Assertions.assertEquals(500, estimated.getMinValue());
+ Assertions.assertEquals(1, estimated.getNdv());
+ }
+
+ // MIN(a)
+ // a belongs to [0, 500]
+ @Test
+ public void test2() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 0, 500));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ Min max = new Min(a);
+ ColumnStat estimated = ExpressionEstimation.estimate(max, stat);
+ Assertions.assertEquals(0, estimated.getMaxValue());
+ Assertions.assertEquals(1, estimated.getNdv());
+ }
+
+ // a + b
+ // a belongs to [0, 500]
+ // b belongs to [300, 1000]
+ @Test
+ public void test3() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 0, 500));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, 300, 1000));
+ Add add = new Add(a, b);
+ ColumnStat estimated = ExpressionEstimation.estimate(add, stat);
+ Assertions.assertEquals(300, estimated.getMinValue());
+ Assertions.assertEquals(1500, estimated.getMaxValue());
+ }
+
+ // a - b
+ // a belongs to [0, 500]
+ // b belongs to [300, 1000]
+ @Test
+ public void test4() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 0, 500));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, 300, 1000));
+ Subtract subtract = new Subtract(a, b);
+ ColumnStat estimated = ExpressionEstimation.estimate(subtract, stat);
+ Assertions.assertEquals(-1000, estimated.getMinValue());
+ Assertions.assertEquals(200, estimated.getMaxValue());
+ }
+
+ // a * b
+ // a belongs to [-200, -100]
+ // b belongs to [-300, 1000]
+ @Test
+ public void test5() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, -200, -100));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, -300, 1000));
+ Multiply multiply = new Multiply(a, b);
+ ColumnStat estimated = ExpressionEstimation.estimate(multiply, stat);
+ Assertions.assertEquals(-200 * 1000, estimated.getMinValue());
+ Assertions.assertEquals(-200 * -300, estimated.getMaxValue());
+ }
+
+ // a * b
+ // a belongs to [-200, -100]
+ // b belongs to [-1000, -300]
+ @Test
+ public void test6() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, -200, -100));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, -1000, -300));
+ Multiply multiply = new Multiply(a, b);
+ ColumnStat estimated = ExpressionEstimation.estimate(multiply, stat);
+ Assertions.assertEquals(-100 * -300, estimated.getMinValue());
+ Assertions.assertEquals(-200 * -1000, estimated.getMaxValue());
+ }
+
+ // a / b
+ // a belongs to [-200, -100]
+ // b belongs to [-300, 1000]
+ @Test
+ public void test7() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, -200, -100));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, -300, 1000));
+ Divide divide = new Divide(a, b);
+ ColumnStat estimated = ExpressionEstimation.estimate(divide, stat);
+ Assertions.assertTrue(Precision.equals(-0.2, estimated.getMinValue(), 0.001));
+ Assertions.assertTrue(Precision.equals(0.666, estimated.getMaxValue(), 0.001));
+ }
+
+ // a / b
+ // a belongs to [-200, -100]
+ // b belongs to [-1000, -100]
+ @Test
+ public void test8() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, -200, -100));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, -1000, -100));
+ Divide divide = new Divide(a, b);
+ ColumnStat estimated = ExpressionEstimation.estimate(divide, stat);
+ Assertions.assertTrue(Precision.equals(0.1, estimated.getMinValue(), 0.001));
+ Assertions.assertEquals(2, estimated.getMaxValue());
+ }
+}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
new file mode 100644
index 0000000000..11dee4e74c
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.stats;
+
+import org.apache.doris.nereids.trees.expressions.And;
+import org.apache.doris.nereids.trees.expressions.EqualTo;
+import org.apache.doris.nereids.trees.expressions.GreaterThan;
+import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
+import org.apache.doris.nereids.trees.expressions.InPredicate;
+import org.apache.doris.nereids.trees.expressions.LessThan;
+import org.apache.doris.nereids.trees.expressions.LessThanEqual;
+import org.apache.doris.nereids.trees.expressions.Not;
+import org.apache.doris.nereids.trees.expressions.Or;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.statistics.ColumnStat;
+import org.apache.doris.statistics.StatsDeriveResult;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.math3.util.Precision;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+class FilterEstimationTest {
+
+ // a > 500 and b < 100 or a = c
+ @Test
+ public void test1() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral int500 = new IntegerLiteral(500);
+ GreaterThan greaterThan1 = new GreaterThan(a, int500);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ IntegerLiteral int100 = new IntegerLiteral(100);
+ LessThan lessThan = new LessThan(b, int100);
+ SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
+ EqualTo equalTo = new EqualTo(a, c);
+ And and = new And(greaterThan1, lessThan);
+ Or or = new Or(and, equalTo);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 500, 0, 1000));
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 500, 0, 1000));
+ slotToColumnStat.put(c, new ColumnStat(500, 4, 4, 500, 0, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(or);
+ Assertions.assertTrue(
+ Precision.equals((0.5 * 0.1 + 0.1 - 0.5 * 0.1 * 0.1) * 1000, expected.getRowCount(), 0.01));
+ }
+
+ // a > 500 and b < 100 or a > c
+ @Test
+ public void test2() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral int500 = new IntegerLiteral(500);
+ GreaterThan greaterThan1 = new GreaterThan(a, int500);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ IntegerLiteral int100 = new IntegerLiteral(100);
+ LessThan lessThan = new LessThan(b, int100);
+ SlotReference c = new SlotReference("c", IntegerType.INSTANCE);
+ GreaterThan greaterThan = new GreaterThan(a, c);
+ And and = new And(greaterThan1, lessThan);
+ Or or = new Or(and, greaterThan);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 500, 0, 1000));
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 500, 0, 1000));
+ slotToColumnStat.put(c, new ColumnStat(500, 4, 4, 500, 0, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(or);
+ Assertions.assertTrue(
+ Precision.equals((0.5 * 0.1 + 0.1 / 0.3 - 0.5 * 0.1 * 0.1 / 0.3) * 1000, expected.getRowCount(), 0.01));
+ }
+
+ // a >= 500
+ // a belongs to [0, 500]
+ @Test
+ public void test3() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral int500 = new IntegerLiteral(500);
+ GreaterThanEqual ge = new GreaterThanEqual(a, int500);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 500, 0, 500));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(ge);
+ Assertions.assertEquals(1000 * 1.0 / 500, expected.getRowCount());
+ }
+
+ // a <= 500
+ // a belongs to [500, 1000]
+ @Test
+ public void test4() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral int500 = new IntegerLiteral(500);
+ LessThanEqual le = new LessThanEqual(a, int500);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 500, 500, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(le);
+ Assertions.assertEquals(1000 * 1.0 / 500, expected.getRowCount());
+ }
+
+ // a < 500
+ // a belongs to [500, 1000]
+ @Test
+ public void test5() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral int500 = new IntegerLiteral(500);
+ LessThan less = new LessThan(a, int500);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 500, 500, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(less);
+ Assertions.assertEquals(0, expected.getRowCount());
+ }
+
+ // a > 1000
+ // a belongs to [500, 1000]
+ @Test
+ public void test6() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral int1000 = new IntegerLiteral(1000);
+ GreaterThan ge = new GreaterThan(a, int1000);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 500, 500, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(ge);
+ Assertions.assertEquals(0, expected.getRowCount());
+ }
+
+ // a > b
+ // a belongs to [0, 500]
+ // b belongs to [501, 100]
+ @Test
+ public void test7() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ GreaterThan ge = new GreaterThan(a, b);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 0, 500));
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, 501, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult expected = filterEstimation.estimate(ge);
+ Assertions.assertEquals(0, expected.getRowCount());
+ }
+
+ // a < b
+ // a belongs to [0, 500]
+ // b belongs to [501, 100]
+ @Test
+ public void test8() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ LessThan less = new LessThan(a, b);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 0, 500));
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, 501, 1000));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult esimated = filterEstimation.estimate(less);
+ Assertions.assertEquals(1000, esimated.getRowCount());
+ }
+
+ // a > b
+ // a belongs to [501, 1000]
+ // b belongs to [0, 500]
+ @Test
+ public void test9() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
+ GreaterThan ge = new GreaterThan(a, b);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(500, 4, 4, 0, 501, 1000));
+ slotToColumnStat.put(b, new ColumnStat(500, 4, 4, 0, 0, 500));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult estimated = filterEstimation.estimate(ge);
+ Assertions.assertEquals(1000, estimated.getRowCount());
+ }
+
+ // a in (1, 3, 5)
+ // a belongs to [1, 10]
+ @Test
+ public void test10() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral i1 = new IntegerLiteral(1);
+ IntegerLiteral i3 = new IntegerLiteral(3);
+ IntegerLiteral i5 = new IntegerLiteral(5);
+ InPredicate inPredicate = new InPredicate(a, Lists.newArrayList(i1, i3, i5));
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(10, 4, 4, 0, 1, 10));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult estimated = filterEstimation.estimate(inPredicate);
+ Assertions.assertEquals(1000 * 3.0 / 10.0, estimated.getRowCount());
+ }
+
+ // a not in (1, 3, 5)
+ // a belongs to [1, 10]
+ @Test
+ public void test11() {
+ SlotReference a = new SlotReference("a", IntegerType.INSTANCE);
+ IntegerLiteral i1 = new IntegerLiteral(1);
+ IntegerLiteral i3 = new IntegerLiteral(3);
+ IntegerLiteral i5 = new IntegerLiteral(5);
+ InPredicate inPredicate = new InPredicate(a, Lists.newArrayList(i1, i3, i5));
+ Not not = new Not(inPredicate);
+ Map<Slot, ColumnStat> slotToColumnStat = new HashMap<>();
+ slotToColumnStat.put(a, new ColumnStat(10, 4, 4, 0, 1, 10));
+ StatsDeriveResult stat = new StatsDeriveResult(1000, slotToColumnStat);
+ FilterEstimation filterEstimation = new FilterEstimation(stat);
+ StatsDeriveResult estimated = filterEstimation.estimate(not);
+ Assertions.assertEquals(1000 * 7.0 / 10.0, estimated.getRowCount());
+ }
+}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index 1767a0940f..8647886853 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -17,10 +17,8 @@
package org.apache.doris.nereids.stats;
-import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.common.AnalysisException;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.properties.LogicalProperties;
@@ -39,7 +37,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.qe.ConnectContext;
-import org.apache.doris.statistics.ColumnStats;
+import org.apache.doris.statistics.ColumnStat;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsManager;
import org.apache.doris.statistics.StatsDeriveResult;
@@ -48,8 +46,6 @@ import org.apache.doris.statistics.TableStats;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import mockit.Expectations;
-import mockit.Mock;
-import mockit.MockUp;
import mockit.Mocked;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -116,13 +112,16 @@ public class StatsCalculatorTest {
qualifier.add("t");
SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier);
SlotReference slot2 = new SlotReference("c2", IntegerType.INSTANCE, true, qualifier);
- ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5,
- new NullLiteral(), new NullLiteral());
- ColumnStats columnStats2 = new ColumnStats(20, 0, 0, 10,
- new NullLiteral(), new NullLiteral());
- Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>();
- slotColumnStatsMap.put(slot1, columnStats1);
- slotColumnStatsMap.put(slot2, columnStats2);
+
+ ColumnStat columnStat1 = new ColumnStat();
+ columnStat1.setNdv(10);
+ columnStat1.setNumNulls(5);
+ ColumnStat columnStat2 = new ColumnStat();
+ columnStat2.setNdv(20);
+ columnStat1.setNumNulls(10);
+ Map<Slot, ColumnStat> slotColumnStatsMap = new HashMap<>();
+ slotColumnStatsMap.put(slot1, columnStat1);
+ slotColumnStatsMap.put(slot2, columnStat2);
StatsDeriveResult childStats = new StatsDeriveResult(10000, slotColumnStatsMap);
EqualTo eq1 = new EqualTo(slot1, new IntegerLiteral(1));
@@ -198,17 +197,13 @@ public class StatsCalculatorTest {
// }
@Test
- public void testOlapScan() throws AnalysisException {
- ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5,
- new NullLiteral(), new NullLiteral());
- new MockUp<TableStats>(TableStats.class) {
- @Mock
- public ColumnStats getColumnStats(String columnName) {
- return columnStats1;
- }
- };
-
+ public void testOlapScan() {
+ ColumnStat columnStat1 = new ColumnStat();
+ columnStat1.setNdv(10);
+ columnStat1.setNumNulls(5);
long tableId1 = 0;
+ TableStats tableStats1 = new TableStats();
+ tableStats1.putColumnStats("c1", columnStat1);
Statistics statistics = new Statistics();
List<String> qualifier = ImmutableList.of("test", "t");
@@ -243,10 +238,11 @@ public class StatsCalculatorTest {
qualifier.add("test");
qualifier.add("t");
SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier);
- ColumnStats columnStats1 = new ColumnStats(10, 1, 1, 5,
- new NullLiteral(), new NullLiteral());
- Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>();
- slotColumnStatsMap.put(slot1, columnStats1);
+ ColumnStat columnStat1 = new ColumnStat();
+ columnStat1.setNdv(10);
+ columnStat1.setNumNulls(5);
+ Map<Slot, ColumnStat> slotColumnStatsMap = new HashMap<>();
+ slotColumnStatsMap.put(slot1, columnStat1);
StatsDeriveResult childStats = new StatsDeriveResult(10, slotColumnStatsMap);
Group childGroup = new Group();
@@ -262,7 +258,7 @@ public class StatsCalculatorTest {
StatsCalculator.estimate(groupExpression);
StatsDeriveResult limitStats = ownerGroup.getStatistics();
Assertions.assertEquals(1, limitStats.getRowCount());
- ColumnStats slot1Stats = limitStats.getSlotToColumnStats().get(slot1);
+ ColumnStat slot1Stats = limitStats.getSlotToColumnStats().get(slot1);
Assertions.assertEquals(1, slot1Stats.getNdv());
Assertions.assertEquals(1, slot1Stats.getNumNulls());
}
@@ -273,10 +269,11 @@ public class StatsCalculatorTest {
qualifier.add("test");
qualifier.add("t");
SlotReference slot1 = new SlotReference("c1", IntegerType.INSTANCE, true, qualifier);
- ColumnStats columnStats1 = new ColumnStats(10, 0, 0, 5,
- new NullLiteral(), new NullLiteral());
- Map<Slot, ColumnStats> slotColumnStatsMap = new HashMap<>();
- slotColumnStatsMap.put(slot1, columnStats1);
+ ColumnStat columnStat1 = new ColumnStat();
+ columnStat1.setNdv(10);
+ columnStat1.setNumNulls(5);
+ Map<Slot, ColumnStat> slotColumnStatsMap = new HashMap<>();
+ slotColumnStatsMap.put(slot1, columnStat1);
StatsDeriveResult childStats = new StatsDeriveResult(10, slotColumnStatsMap);
Group childGroup = new Group();
@@ -292,7 +289,7 @@ public class StatsCalculatorTest {
StatsCalculator.estimate(groupExpression);
StatsDeriveResult topNStats = ownerGroup.getStatistics();
Assertions.assertEquals(1, topNStats.getRowCount());
- ColumnStats slot1Stats = topNStats.getSlotToColumnStats().get(slot1);
+ ColumnStat slot1Stats = topNStats.getSlotToColumnStats().get(slot1);
Assertions.assertEquals(1, slot1Stats.getNdv());
Assertions.assertEquals(1, slot1Stats.getNumNulls());
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java
index c6ec604a60..43c2fc487f 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/ColumnStatsTest.java
@@ -17,8 +17,6 @@
package org.apache.doris.statistics;
-import org.apache.doris.analysis.LiteralExpr;
-import org.apache.doris.analysis.NullLiteral;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
@@ -30,14 +28,13 @@ import org.junit.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
public class ColumnStatsTest {
- private ColumnStats columnStatsUnderTest;
+ private ColumnStat columnStatsUnderTest;
@Before
public void setUp() throws Exception {
- columnStatsUnderTest = new ColumnStats();
+ columnStatsUnderTest = new ColumnStat();
}
@Test
@@ -53,14 +50,14 @@ public class ColumnStatsTest {
columnStatsUnderTest.updateStats(columnType, statsTypeToValue);
// Verify the results
- long maxSize = columnStatsUnderTest.getMaxSize();
- Assert.assertEquals(8, maxSize);
+ double maxSize = columnStatsUnderTest.getMaxSizeByte();
+ Assert.assertEquals(8, maxSize, 0.1);
- long minValue = columnStatsUnderTest.getMinValue().getLongValue();
- Assert.assertEquals(0, minValue);
+ double minValue = columnStatsUnderTest.getMinValue();
+ Assert.assertEquals(0, minValue, 0.1);
- long maxValue = columnStatsUnderTest.getMaxValue().getLongValue();
- Assert.assertEquals(100, maxValue);
+ double maxValue = columnStatsUnderTest.getMaxValue();
+ Assert.assertEquals(100, maxValue, 0.1);
}
@Test
@@ -88,7 +85,7 @@ public class ColumnStatsTest {
statsTypeToValue.put(StatsType.MAX_VALUE, "1000");
columnStatsUnderTest.updateStats(columnType, statsTypeToValue);
- String[] expectedInfo = {"1", "8.0", "8", "2", "0", "1000"};
+ String[] expectedInfo = {"1.0", "8.0", "8.0", "2.0", "0", "1000"};
// Run the test
List<String> showInfo = columnStatsUnderTest.getShowInfo();
@@ -101,59 +98,57 @@ public class ColumnStatsTest {
@Test
public void testGetDefaultColumnStats() {
// Run the test
- ColumnStats defaultColumnStats = ColumnStats.getDefaultColumnStats();
+ ColumnStat defaultColumnStats = ColumnStat.getDefaultColumnStats();
// Verify the results
- long ndv = defaultColumnStats.getNdv();
- Assert.assertEquals(-1L, ndv);
+ double ndv = defaultColumnStats.getNdv();
+ Assert.assertEquals(-1L, ndv, 0.1);
- float avgSize = defaultColumnStats.getAvgSize();
+ double avgSize = defaultColumnStats.getAvgSizeByte();
Assert.assertEquals(-1.0f, avgSize, 0.0001);
- long maxSize = defaultColumnStats.getMaxSize();
- Assert.assertEquals(-1L, maxSize);
+ double maxSize = defaultColumnStats.getMaxSizeByte();
+ Assert.assertEquals(-1L, maxSize, 0.1);
- LiteralExpr maxValue = defaultColumnStats.getMaxValue();
- Assert.assertEquals(new NullLiteral(), maxValue);
+ double maxValue = defaultColumnStats.getMaxValue();
+ Assert.assertEquals(Double.NaN, maxValue, 0.1);
- LiteralExpr minValue = defaultColumnStats.getMinValue();
- Assert.assertEquals(new NullLiteral(), minValue);
+ double minValue = defaultColumnStats.getMinValue();
+ Assert.assertEquals(Double.NaN, minValue, 0.1);
}
@Test
public void testAggColumnStats() throws Exception {
// Setup
- ColumnStats columnStats = ColumnStats.getDefaultColumnStats();
- Type minValueType = Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.STRING));
- Type maxValueType = Objects.requireNonNull(Type.fromPrimitiveType(PrimitiveType.STRING));
- ColumnStats other = new ColumnStats(1L, 4.0f, 5L, 10L,
- LiteralExpr.create("sMinValue", minValueType),
- LiteralExpr.create("sMaxValue", maxValueType));
+ ColumnStat columnStats = ColumnStat.getDefaultColumnStats();
+ ColumnStat other = new ColumnStat(1L, 4.0f, 5L, 10L,
+ Double.NaN,
+ Double.NaN);
// Run the test
- ColumnStats aggColumnStats = ColumnStats.mergeColumnStats(columnStats, other);
+ ColumnStat aggColumnStats = ColumnStat.mergeColumnStats(columnStats, other);
// Verify the results
- long ndv = aggColumnStats.getNdv();
+ double ndv = aggColumnStats.getNdv();
// 0(default) + 1
- Assert.assertEquals(1L, ndv);
+ Assert.assertEquals(1L, ndv, 0.1);
- float avgSize = aggColumnStats.getAvgSize();
+ double avgSize = aggColumnStats.getAvgSizeByte();
// (0.0f + 4.0f) / 2
Assert.assertEquals(4.0f, avgSize, 0.0001);
- long maxSize = aggColumnStats.getMaxSize();
- Assert.assertEquals(5L, maxSize);
+ double maxSize = aggColumnStats.getMaxSizeByte();
+ Assert.assertEquals(5L, maxSize, 0.1);
- long numNulls = aggColumnStats.getNumNulls();
- Assert.assertEquals(10L, numNulls);
+ double numNulls = aggColumnStats.getNumNulls();
+ Assert.assertEquals(10L, numNulls, 0.1);
- String minValue = aggColumnStats.getMinValue().getStringValue();
+ double minValue = aggColumnStats.getMinValue();
// null VS sMinValue
- Assert.assertEquals("NULL", minValue);
+ Assert.assertEquals(Double.NaN, minValue, 0.1);
- String maxValue = aggColumnStats.getMaxValue().getStringValue();
+ double maxValue = aggColumnStats.getMaxValue();
// null VS sMaxValue
- Assert.assertEquals("sMaxValue", maxValue);
+ Assert.assertEquals(Double.NaN, maxValue, 0.1);
}
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java
index fe9bb67bce..522877bc0a 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/PartitionStatsTest.java
@@ -81,26 +81,26 @@ public class PartitionStatsTest {
// Run the test
partitionStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue);
- ColumnStats columnStats = partitionStatsUnderTest.getColumnStats("columnName");
+ ColumnStat columnStats = partitionStatsUnderTest.getColumnStats("columnName");
// Verify the results
- long ndv = columnStats.getNdv();
- Assert.assertEquals(1, ndv);
+ double ndv = columnStats.getNdv();
+ Assert.assertEquals(1, ndv, 0.1);
- float avgSize = columnStats.getAvgSize();
+ double avgSize = columnStats.getAvgSizeByte();
Assert.assertEquals(8.0f, avgSize, 0.0001);
- long maxSize = columnStats.getMaxSize();
- Assert.assertEquals(8, maxSize);
+ double maxSize = columnStats.getMaxSizeByte();
+ Assert.assertEquals(8, maxSize, 0.1);
- long maxValue = columnStats.getMaxValue().getLongValue();
- Assert.assertEquals(1000, maxValue);
+ double maxValue = columnStats.getMaxValue();
+ Assert.assertEquals(1000, maxValue, 0.1);
- long minValue = columnStats.getMinValue().getLongValue();
- Assert.assertEquals(0, minValue);
+ double minValue = columnStats.getMinValue();
+ Assert.assertEquals(0, minValue, 0.1);
- long numNulls = columnStats.getNumNulls();
- Assert.assertEquals(2, numNulls);
+ double numNulls = columnStats.getNumNulls();
+ Assert.assertEquals(2, numNulls, 0.1);
}
@Test
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java
index 640cb0a1e3..9c10faa828 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsManagerTest.java
@@ -92,8 +92,8 @@ public class StatisticsManagerTest {
TableStats tableStats = statistics.getTableStats(0L);
// Verify the results
- long rowCount = tableStats.getRowCount();
- Assert.assertEquals(1000L, rowCount);
+ double rowCount = tableStats.getRowCount();
+ Assert.assertEquals(1000L, rowCount, 0.1);
long dataSize = tableStats.getDataSize();
Assert.assertEquals(10240L, dataSize);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java
index 4a45f301bd..843606dd37 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsTest.java
@@ -44,7 +44,7 @@ public class StatisticsTest {
// Run the test
statisticsUnderTest.updateTableStats(0L, statsTypeToValue);
- long rowCount = statisticsUnderTest.getTableStats(0L).getRowCount();
+ long rowCount = (long) statisticsUnderTest.getTableStats(0L).getRowCount();
// Verify the results
Assert.assertEquals(1000L, rowCount);
@@ -97,8 +97,8 @@ public class StatisticsTest {
// Run the test
statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue);
- Map<String, ColumnStats> columnStats = statisticsUnderTest.getColumnStats(0L);
- long numNulls = columnStats.get("columnName").getNumNulls();
+ Map<String, ColumnStat> columnStats = statisticsUnderTest.getColumnStats(0L);
+ long numNulls = (long) columnStats.get("columnName").getNumNulls();
// Verify the results
Assert.assertEquals(1000L, numNulls);
@@ -126,9 +126,9 @@ public class StatisticsTest {
// Run the test
statisticsUnderTest.updateColumnStats(0L, "partitionName",
"columnName", columnType, statsTypeToValue);
- Map<String, ColumnStats> columnStats = statisticsUnderTest
+ Map<String, ColumnStat> columnStats = statisticsUnderTest
.getColumnStats(0L, "partitionName");
- long numNulls = columnStats.get("columnName").getNumNulls();
+ long numNulls = (long) columnStats.get("columnName").getNumNulls();
// Verify the results
Assert.assertEquals(1000L, numNulls);
@@ -157,8 +157,8 @@ public class StatisticsTest {
TableStats result = statisticsUnderTest.getTableStats(0L);
// Verify the results
- long rowCount = result.getRowCount();
- Assert.assertEquals(1000, rowCount);
+ double rowCount = result.getRowCount();
+ Assert.assertEquals(1000, rowCount, 0.1);
}
@Test
@@ -224,12 +224,12 @@ public class StatisticsTest {
statisticsUnderTest.updateColumnStats(0L, "columnName", columnType, statsTypeToValue);
// Run the test
- Map<String, ColumnStats> result = statisticsUnderTest.getColumnStats(0L);
+ Map<String, ColumnStat> result = statisticsUnderTest.getColumnStats(0L);
// Verify the results
- ColumnStats columnStats = result.get("columnName");
- long numNulls = columnStats.getNumNulls();
- Assert.assertEquals(1000, numNulls);
+ ColumnStat columnStats = result.get("columnName");
+ double numNulls = columnStats.getNumNulls();
+ Assert.assertEquals(1000, numNulls, 0.1);
}
@Test
@@ -249,13 +249,13 @@ public class StatisticsTest {
"columnName", columnType, statsTypeToValue);
// Run the test
- Map<String, ColumnStats> result = statisticsUnderTest
+ Map<String, ColumnStat> result = statisticsUnderTest
.getColumnStats(0L, "partitionName");
// Verify the results
- ColumnStats columnStats = result.get("columnName");
- long numNulls = columnStats.getNumNulls();
- Assert.assertEquals(1000, numNulls);
+ ColumnStat columnStats = result.get("columnName");
+ double numNulls = columnStats.getNumNulls();
+ Assert.assertEquals(1000, numNulls, 0.1);
}
@Test
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java
index 8106a9ead6..9c6ccdd380 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsTest.java
@@ -48,8 +48,8 @@ public class TableStatsTest {
tableStatsUnderTest.updateTableStats(statsTypeToValue);
// Verify the results
- long rowCount = tableStatsUnderTest.getRowCount();
- Assert.assertEquals(1000, rowCount);
+ double rowCount = tableStatsUnderTest.getRowCount();
+ Assert.assertEquals(1000, rowCount, 0.01);
long dataSize = tableStatsUnderTest.getDataSize();
Assert.assertEquals(10240, dataSize);
@@ -111,26 +111,26 @@ public class TableStatsTest {
// Run the test
tableStatsUnderTest.updateColumnStats("columnName", columnType, statsTypeToValue);
- ColumnStats columnStats = tableStatsUnderTest.getColumnStats("columnName");
+ ColumnStat columnStats = tableStatsUnderTest.getColumnStats("columnName");
// Verify the results
- long ndv = columnStats.getNdv();
- Assert.assertEquals(1L, ndv);
+ double ndv = columnStats.getNdv();
+ Assert.assertEquals(1L, ndv, 0.01);
- float avgSize = columnStats.getAvgSize();
+ double avgSize = columnStats.getAvgSizeByte();
Assert.assertEquals(8.0f, avgSize, 0.0001);
- long maxSize = columnStats.getMaxSize();
- Assert.assertEquals(8L, maxSize);
+ double maxSize = columnStats.getMaxSizeByte();
+ Assert.assertEquals(8L, maxSize, 0.01);
- long maxValue = columnStats.getMaxValue().getLongValue();
- Assert.assertEquals(1000, maxValue);
+ double maxValue = columnStats.getMaxValue();
+ Assert.assertEquals(1000, maxValue, 0.01);
- long minValue = columnStats.getMinValue().getLongValue();
- Assert.assertEquals(0L, minValue);
+ double minValue = columnStats.getMinValue();
+ Assert.assertEquals(0L, minValue, 0.01);
- long numNulls = columnStats.getNumNulls();
- Assert.assertEquals(2, numNulls);
+ double numNulls = columnStats.getNumNulls();
+ Assert.assertEquals(2, numNulls, 0.01);
}
@Test
@@ -152,7 +152,7 @@ public class TableStatsTest {
statsTypeToValue.put(StatsType.DATA_SIZE, "10240");
tableStatsUnderTest.updateTableStats(statsTypeToValue);
- String[] expectedInfo = {"1000", "10240"};
+ String[] expectedInfo = {"1000.0", "10240"};
// Run the test
List<String> showInfo = tableStatsUnderTest.getShowInfo();
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org