You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/06 16:55:49 UTC
[doris] branch master updated: [enhancement](histogram) add histogram syntax and perstist histogram statistics (#15490)
This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5dfdacd278 [enhancement](histogram) add histogram syntax and perstist histogram statistics (#15490)
5dfdacd278 is described below
commit 5dfdacd278de0025baa51e1f52c31be65701f78a
Author: ElvinWei <zh...@outlook.com>
AuthorDate: Sat Jan 7 00:55:42 2023 +0800
[enhancement](histogram) add histogram syntax and perstist histogram statistics (#15490)
Histogram statistics are more expensive to collect and we collect and persist them separately.
This PR does the following work:
1. Add histogram syntax and add keyword `TABLE`
2. Add the task of collecting histogram statistics
3. Persistent histogram statistics
4. Replace fastjson with gson
5. Add unit tests...
Relevant syntax examples:
> Refer to some databases such as mysql and add the keyword `TABLE`.
```SQL
-- collect column statistics
ANALYZE TABLE statistics_test;
-- collect histogram statistics
ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2;
```
base on #15317
---
fe/fe-core/src/main/cup/sql_parser.cup | 9 +-
.../doris/analysis/AlterColumnStatsStmt.java | 1 -
.../org/apache/doris/analysis/AnalyzeStmt.java | 14 +-
.../java/org/apache/doris/catalog/FunctionSet.java | 2 +
.../doris/catalog/InternalSchemaInitializer.java | 35 +++-
.../java/org/apache/doris/catalog/OlapTable.java | 4 +
.../doris/nereids/stats/StatsCalculator.java | 3 +-
.../trees/expressions/functions/table/Numbers.java | 2 +-
.../apache/doris/statistics/AnalysisManager.java | 7 +-
.../apache/doris/statistics/AnalysisTaskInfo.java | 7 +-
.../apache/doris/statistics/BaseAnalysisTask.java | 7 +-
.../java/org/apache/doris/statistics/Bucket.java | 25 +++
.../apache/doris/statistics/ColumnStatistic.java | 42 +----
.../doris/statistics/ColumnStatisticBuilder.java | 13 +-
.../org/apache/doris/statistics/Histogram.java | 187 ++++++++++-----------
.../apache/doris/statistics/HistogramBuilder.java | 100 +++++++++++
.../org/apache/doris/statistics/HistogramTask.java | 90 ++++++++++
.../apache/doris/statistics/HiveAnalysisTask.java | 4 +-
.../doris/statistics/IcebergAnalysisTask.java | 2 +-
.../statistics/{Bucket.java => Statistic.java} | 55 +++---
.../doris/statistics/StatisticConstants.java | 2 +
.../apache/doris/statistics/StatisticsCache.java | 36 +++-
.../doris/statistics/StatisticsCacheLoader.java | 40 ++++-
.../doris/statistics/StatisticsRepository.java | 20 ++-
.../doris/statistics/util/StatisticsUtil.java | 30 ++++
fe/fe-core/src/main/jflex/sql_scanner.flex | 1 +
.../doris/nereids/util/HyperGraphBuilder.java | 2 +-
.../apache/doris/statistics/AnalysisJobTest.java | 2 +-
.../org/apache/doris/statistics/CacheTest.java | 78 ++++++++-
.../org/apache/doris/statistics/HistogramTest.java | 16 +-
.../apache/doris/statistics/MVStatisticsTest.java | 2 +-
.../doris/statistics/StatsDeriveResultTest.java | 2 +-
.../suites/statistics/alter_col_stats.groovy | 3 +-
33 files changed, 603 insertions(+), 240 deletions(-)
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup
index 77260ba95f..f1acb245ea 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -615,7 +615,8 @@ terminal String
KW_WRITE,
KW_YEAR,
KW_MTMV,
- KW_TYPECAST;
+ KW_TYPECAST,
+ KW_HISTOGRAM;
terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT;
terminal BITAND, BITOR, BITXOR, BITNOT;
@@ -2648,10 +2649,14 @@ show_create_routine_load_stmt ::=
// analyze statment
analyze_stmt ::=
- KW_ANALYZE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties
+ KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols opt_partition_names:partitionNames opt_properties:properties
{:
RESULT = new AnalyzeStmt(tbl, cols, partitionNames, properties);
:}
+ | KW_ANALYZE KW_TABLE table_name:tbl KW_UPDATE KW_HISTOGRAM KW_ON ident_list:cols opt_properties:properties
+ {:
+ RESULT = new AnalyzeStmt(tbl, cols, properties);
+ :}
;
// Grant statement
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
index 1af3dffff9..440a6acca9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterColumnStatsStmt.java
@@ -62,7 +62,6 @@ public class AlterColumnStatsStmt extends DdlStmt {
.add(ColumnStatistic.NUM_NULLS)
.add(ColumnStatistic.MIN_VALUE)
.add(ColumnStatistic.MAX_VALUE)
- .add(ColumnStatistic.HISTOGRAM)
.add(StatsType.DATA_SIZE)
.build();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
index ecf34cd23c..d80dda6d43 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
@@ -64,6 +64,8 @@ public class AnalyzeStmt extends DdlStmt {
// time to wait for collect statistics
public static final String CBO_STATISTICS_TASK_TIMEOUT_SEC = "cbo_statistics_task_timeout_sec";
+ public boolean isHistogram = false;
+
private static final ImmutableSet<String> PROPERTIES_SET = new ImmutableSet.Builder<String>()
.add(CBO_STATISTICS_TASK_TIMEOUT_SEC)
.build();
@@ -76,7 +78,7 @@ public class AnalyzeStmt extends DdlStmt {
private TableIf table;
- private final PartitionNames optPartitionNames;
+ private PartitionNames optPartitionNames;
private List<String> optColumnNames;
private Map<String, String> optProperties;
@@ -85,6 +87,16 @@ public class AnalyzeStmt extends DdlStmt {
private final List<String> partitionNames = Lists.newArrayList();
+ public AnalyzeStmt(TableName tableName,
+ List<String> optColumnNames,
+ Map<String, String> optProperties) {
+ this.tableName = tableName;
+ this.optColumnNames = optColumnNames;
+ wholeTbl = CollectionUtils.isEmpty(optColumnNames);
+ isHistogram = true;
+ this.optProperties = optProperties;
+ }
+
public AnalyzeStmt(TableName tableName,
List<String> optColumnNames,
PartitionNames optPartitionNames,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 1b437e3e92..d25395177a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -2612,6 +2612,8 @@ public class FunctionSet<T> {
"", "", "", "", "", true, false, true, true));
addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t,
"", "", "", "", "", true, false, true, true));
+ addBuiltin(AggregateFunction.createBuiltin(HISTOGRAM, Lists.newArrayList(t, Type.DOUBLE, Type.INT), Type.VARCHAR, t,
+ "", "", "", "", "", true, false, true, true));
}
// Avg
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
index eccadac0f6..45061e5287 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java
@@ -79,6 +79,7 @@ public class InternalSchemaInitializer extends Thread {
private void createTbl() throws UserException {
Env.getCurrentEnv().getInternalCatalog().createTable(buildStatisticsTblStmt());
+ Env.getCurrentEnv().getInternalCatalog().createTable(buildHistogramTblStmt());
Env.getCurrentEnv().getInternalCatalog().createTable(buildAnalysisJobTblStmt());
}
@@ -115,7 +116,6 @@ public class InternalSchemaInitializer extends Thread {
columnDefs.add(new ColumnDef("null_count", TypeDef.create(PrimitiveType.BIGINT)));
columnDefs.add(new ColumnDef("min", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
columnDefs.add(new ColumnDef("max", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
- columnDefs.add(new ColumnDef("histogram", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
columnDefs.add(new ColumnDef("data_size_in_bytes", TypeDef.create(PrimitiveType.BIGINT)));
columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME)));
String engineName = "olap";
@@ -138,6 +138,39 @@ public class InternalSchemaInitializer extends Thread {
return createTableStmt;
}
+ @VisibleForTesting
+ public CreateTableStmt buildHistogramTblStmt() throws UserException {
+ TableName tableName = new TableName("",
+ FeConstants.INTERNAL_DB_NAME, StatisticConstants.HISTOGRAM_TBL_NAME);
+ List<ColumnDef> columnDefs = new ArrayList<>();
+ columnDefs.add(new ColumnDef("id", TypeDef.createVarchar(StatisticConstants.ID_LEN)));
+ columnDefs.add(new ColumnDef("catalog_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
+ columnDefs.add(new ColumnDef("db_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
+ columnDefs.add(new ColumnDef("tbl_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
+ columnDefs.add(new ColumnDef("idx_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
+ columnDefs.add(new ColumnDef("col_id", TypeDef.createVarchar(StatisticConstants.MAX_NAME_LEN)));
+ columnDefs.add(new ColumnDef("sample_rate", TypeDef.create(PrimitiveType.DOUBLE)));
+ columnDefs.add(new ColumnDef("buckets", TypeDef.createVarchar(ScalarType.MAX_VARCHAR_LENGTH)));
+ columnDefs.add(new ColumnDef("update_time", TypeDef.create(PrimitiveType.DATETIME)));
+ String engineName = "olap";
+ KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS,
+ Lists.newArrayList("id"));
+ DistributionDesc distributionDesc = new HashDistributionDesc(
+ StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT,
+ Lists.newArrayList("id"));
+ Map<String, String> properties = new HashMap<String, String>() {
+ {
+ put("replication_num", String.valueOf(Config.statistic_internal_table_replica_num));
+ }
+ };
+ CreateTableStmt createTableStmt = new CreateTableStmt(true, false,
+ tableName, columnDefs, engineName, keysDesc, null, distributionDesc,
+ properties, null, "Doris internal statistics table, don't modify it", null);
+ StatisticsUtil.analyze(createTableStmt);
+ // createTableStmt.setClusterName(SystemInfoService.DEFAULT_CLUSTER);
+ return createTableStmt;
+ }
+
@VisibleForTesting
public CreateTableStmt buildAnalysisJobTblStmt() throws UserException {
TableName tableName = new TableName("",
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 2cde04593d..db628618d6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -51,6 +51,7 @@ import org.apache.doris.statistics.AnalysisTaskInfo;
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
import org.apache.doris.statistics.AnalysisTaskScheduler;
import org.apache.doris.statistics.BaseAnalysisTask;
+import org.apache.doris.statistics.HistogramTask;
import org.apache.doris.statistics.MVAnalysisTask;
import org.apache.doris.statistics.OlapAnalysisTask;
import org.apache.doris.system.Backend;
@@ -1005,6 +1006,9 @@ public class OlapTable extends Table {
@Override
public BaseAnalysisTask createAnalysisTask(AnalysisTaskScheduler scheduler, AnalysisTaskInfo info) {
+ if (info.analysisType.equals(AnalysisType.HISTOGRAM)) {
+ return new HistogramTask(scheduler, info);
+ }
if (info.analysisType.equals(AnalysisType.COLUMN)) {
return new OlapAnalysisTask(scheduler, info);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 2b2bf83e6b..e6ae621ca8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -290,6 +290,7 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
return computeTopN(topN);
}
+ @Override
public StatsDeriveResult visitPhysicalLocalQuickSort(PhysicalLocalQuickSort<? extends Plan> sort, Void context) {
return groupExpression.childStatistics(0);
}
@@ -447,7 +448,6 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
stats.dataSize < 0 ? stats.dataSize : stats.dataSize * groupingSetNum,
stats.minValue,
stats.maxValue,
- stats.histogram,
stats.selectivity,
stats.minExpr,
stats.maxExpr,
@@ -538,7 +538,6 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void>
leftStats.dataSize + rightStats.dataSize,
Math.min(leftStats.minValue, rightStats.minValue),
Math.max(leftStats.maxValue, rightStats.maxValue),
- null,
1.0 / (leftStats.ndv + rightStats.ndv),
leftStats.minExpr,
leftStats.maxExpr,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
index 0aa0fb93a7..fc94a3dc09 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java
@@ -71,7 +71,7 @@ public class Numbers extends TableValuedFunction {
Map<Id, ColumnStatistic> columnToStatistics = Maps.newHashMap();
ColumnStatistic columnStat = new ColumnStatistic(rowNum, rowNum, 8, 0, 8, 0, rowNum - 1,
- null, 1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false);
+ 1.0 / rowNum, new IntLiteral(0, Type.BIGINT), new IntLiteral(rowNum - 1, Type.BIGINT), false);
columnToStatistics.put(slots.get(0).getExprId(), columnStat);
return new StatsDeriveResult(rowNum, columnToStatistics);
} catch (Exception t) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index cbfd217158..28fdd4d450 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -43,6 +43,8 @@ import java.util.concurrent.ConcurrentMap;
public class AnalysisManager {
+ public final AnalysisTaskScheduler taskScheduler;
+
private static final Logger LOG = LogManager.getLogger(AnalysisManager.class);
private static final String UPDATE_JOB_STATE_SQL_TEMPLATE = "UPDATE "
@@ -51,8 +53,6 @@ public class AnalysisManager {
private final ConcurrentMap<Long, Map<Long, AnalysisTaskInfo>> analysisJobIdToTaskMap;
- public final AnalysisTaskScheduler taskScheduler;
-
private StatisticsCache statisticsCache;
private final AnalysisTaskExecutor taskExecutor;
@@ -76,10 +76,11 @@ public class AnalysisManager {
if (colNames != null) {
for (String colName : colNames) {
long taskId = Env.getCurrentEnv().getNextId();
+ AnalysisType analType = analyzeStmt.isHistogram ? AnalysisType.HISTOGRAM : AnalysisType.COLUMN;
AnalysisTaskInfo analysisTaskInfo = new AnalysisTaskInfoBuilder().setJobId(jobId)
.setTaskId(taskId).setCatalogName(catalogName).setDbName(db)
.setTblName(tbl.getTbl()).setColName(colName).setJobType(JobType.MANUAL)
- .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(AnalysisType.COLUMN)
+ .setAnalysisMethod(AnalysisMethod.FULL).setAnalysisType(analType)
.setState(AnalysisState.PENDING)
.setScheduleType(ScheduleType.ONCE).build();
try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
index 6c2243b261..b0b1c370cb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java
@@ -34,7 +34,8 @@ public class AnalysisTaskInfo {
public enum AnalysisType {
COLUMN,
- INDEX
+ INDEX,
+ HISTOGRAM
}
public enum JobType {
@@ -69,6 +70,10 @@ public class AnalysisTaskInfo {
public final AnalysisType analysisType;
+ // TODO: define constants or get them from configuration properties
+ public final double sampleRate = 0.2;
+ public final int maxBucketNum = 128;
+
public String message;
// finished or failed
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index 81c4601647..54793881ab 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -26,9 +26,13 @@ import org.apache.doris.qe.StmtExecutor;
import org.apache.doris.statistics.AnalysisTaskInfo.AnalysisType;
import com.google.common.annotations.VisibleForTesting;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
public abstract class BaseAnalysisTask {
+ public static final Logger LOG = LogManager.getLogger(BaseAnalysisTask.class);
+
protected static final String INSERT_PART_STATISTICS = "INSERT INTO "
+ "${internalDB}.${columnStatTbl}"
+ " SELECT "
@@ -119,7 +123,8 @@ public abstract class BaseAnalysisTask {
info, AnalysisState.FAILED,
String.format("Table with name %s not exists", info.tblName), System.currentTimeMillis());
}
- if (info.analysisType != null && info.analysisType.equals(AnalysisType.COLUMN)) {
+ if (info.analysisType != null && (info.analysisType.equals(AnalysisType.COLUMN)
+ || info.analysisType.equals(AnalysisType.HISTOGRAM))) {
col = tbl.getColumn(info.colName);
if (col == null) {
Env.getCurrentEnv().getAnalysisManager().updateTaskStatus(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
index adcbd27d73..645a9d68e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
@@ -18,6 +18,15 @@
package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.google.common.collect.Lists;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+
+import java.util.List;
public class Bucket {
public LiteralExpr lower;
@@ -65,4 +74,20 @@ public class Bucket {
public void setNdv(int ndv) {
this.ndv = ndv;
}
+
+ public static List<Bucket> deserializeFromjson(Type datatype, JsonArray jsonArray)
+ throws AnalysisException {
+ List<Bucket> buckets = Lists.newArrayList();
+ for (int i = 0; i < jsonArray.size(); i++) {
+ JsonObject bucketJson = jsonArray.get(i).getAsJsonObject();
+ Bucket bucket = new Bucket();
+ bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").getAsString());
+ bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").getAsString());
+ bucket.count = bucketJson.get("count").getAsInt();
+ bucket.preSum = bucketJson.get("pre_sum").getAsInt();
+ bucket.ndv = bucketJson.get("ndv").getAsInt();
+ buckets.add(bucket);
+ }
+ return buckets;
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index d128e66c50..3a47e368c8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -19,12 +19,7 @@ package org.apache.doris.statistics;
import org.apache.doris.analysis.LiteralExpr;
import org.apache.doris.catalog.Column;
-import org.apache.doris.catalog.DatabaseIf;
-import org.apache.doris.catalog.Env;
-import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.Type;
-import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.StatisticsUtil;
@@ -42,13 +37,12 @@ public class ColumnStatistic {
public static final StatsType NUM_NULLS = StatsType.NUM_NULLS;
public static final StatsType MIN_VALUE = StatsType.MIN_VALUE;
public static final StatsType MAX_VALUE = StatsType.MAX_VALUE;
- public static final StatsType HISTOGRAM = StatsType.HISTOGRAM;
private static final Logger LOG = LogManager.getLogger(ColumnStatistic.class);
public static ColumnStatistic DEFAULT = new ColumnStatisticBuilder().setAvgSizeByte(1).setNdv(1)
.setNumNulls(1).setCount(1).setMaxValue(Double.MAX_VALUE).setMinValue(Double.MIN_VALUE)
- .setHistogram(Histogram.defaultHistogram()).setSelectivity(1.0).setIsUnknown(true)
+ .setSelectivity(1.0).setIsUnknown(true)
.build();
public static final Set<Type> MAX_MIN_UNSUPPORTED_TYPE = new HashSet<>();
@@ -68,7 +62,6 @@ public class ColumnStatistic {
public final double avgSizeByte;
public final double minValue;
public final double maxValue;
- public final Histogram histogram;
public final boolean isUnKnown;
/*
selectivity of Column T1.A:
@@ -90,8 +83,7 @@ public class ColumnStatistic {
public ColumnStatistic(double count, double ndv, double avgSizeByte,
double numNulls, double dataSize, double minValue, double maxValue,
- Histogram histogram, double selectivity, LiteralExpr minExpr,
- LiteralExpr maxExpr, boolean isUnKnown) {
+ double selectivity, LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown) {
this.count = count;
this.ndv = ndv;
this.avgSizeByte = avgSizeByte;
@@ -99,7 +91,6 @@ public class ColumnStatistic {
this.dataSize = dataSize;
this.minValue = minValue;
this.maxValue = maxValue;
- this.histogram = histogram;
this.selectivity = selectivity;
this.minExpr = minExpr;
this.maxExpr = maxExpr;
@@ -127,7 +118,7 @@ public class ColumnStatistic {
long dbID = Long.parseLong(resultRow.getColumnValue("db_id"));
long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id"));
String colName = resultRow.getColumnValue("col_id");
- Column col = findColumn(catalogId, dbID, tblId, idxId, colName);
+ Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
if (col == null) {
LOG.warn("Failed to deserialize column statistics, ctlId: {} dbId: {}"
+ "tblId: {} column: {} not exists",
@@ -136,10 +127,8 @@ public class ColumnStatistic {
}
String min = resultRow.getColumnValue("min");
String max = resultRow.getColumnValue("max");
- String histogram = resultRow.getColumnValue("histogram");
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min));
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max));
- columnStatisticBuilder.setHistogram(Histogram.deserializeFromJson(col.getType(), histogram));
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max));
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min));
columnStatisticBuilder.setSelectivity(1.0);
@@ -158,7 +147,7 @@ public class ColumnStatistic {
public ColumnStatistic copy() {
return new ColumnStatisticBuilder().setCount(count).setNdv(ndv).setAvgSizeByte(avgSizeByte)
.setNumNulls(numNulls).setDataSize(dataSize).setMinValue(minValue)
- .setMaxValue(maxValue).setHistogram(histogram).setMinExpr(minExpr).setMaxExpr(maxExpr)
+ .setMaxValue(maxValue).setMinExpr(minExpr).setMaxExpr(maxExpr)
.setSelectivity(selectivity).setIsUnknown(isUnKnown).build();
}
@@ -182,7 +171,6 @@ public class ColumnStatistic {
.setDataSize(Math.ceil(dataSize * ratio))
.setMinValue(minValue)
.setMaxValue(maxValue)
- .setHistogram(histogram)
.setMinExpr(minExpr)
.setMaxExpr(maxExpr)
.setSelectivity(newSelectivity)
@@ -194,28 +182,6 @@ public class ColumnStatistic {
return Math.max(this.minValue, other.minValue) <= Math.min(this.maxValue, other.maxValue);
}
- public static Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
- CatalogIf<DatabaseIf<TableIf>> catalogIf = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId);
- if (catalogIf == null) {
- return null;
- }
- DatabaseIf<TableIf> db = catalogIf.getDb(dbId).orElse(null);
- if (db == null) {
- return null;
- }
- TableIf tblIf = db.getTable(tblId).orElse(null);
- if (tblIf == null) {
- return null;
- }
- if (idxId != -1) {
- if (tblIf instanceof OlapTable) {
- OlapTable olapTable = (OlapTable) tblIf;
- return olapTable.getIndexIdToMeta().get(idxId).getColumnByName(columnName);
- }
- }
- return tblIf.getColumn(columnName);
- }
-
public ColumnStatistic updateBySelectivity(double selectivity, double rowCount) {
if (isUnKnown) {
return DEFAULT;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
index 4714b3a1bd..57353eb22b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
@@ -27,7 +27,6 @@ public class ColumnStatisticBuilder {
private double dataSize;
private double minValue;
private double maxValue;
- private Histogram histogram;
private double selectivity = 1.0;
private LiteralExpr minExpr;
private LiteralExpr maxExpr;
@@ -45,7 +44,6 @@ public class ColumnStatisticBuilder {
this.dataSize = columnStatistic.dataSize;
this.minValue = columnStatistic.minValue;
this.maxValue = columnStatistic.maxValue;
- this.histogram = columnStatistic.histogram;
this.selectivity = columnStatistic.selectivity;
this.minExpr = columnStatistic.minExpr;
this.maxExpr = columnStatistic.maxExpr;
@@ -87,11 +85,6 @@ public class ColumnStatisticBuilder {
return this;
}
- public ColumnStatisticBuilder setHistogram(Histogram histogram) {
- this.histogram = histogram;
- return this;
- }
-
public ColumnStatisticBuilder setSelectivity(double selectivity) {
this.selectivity = selectivity;
return this;
@@ -140,10 +133,6 @@ public class ColumnStatisticBuilder {
return maxValue;
}
- public Histogram getHistogram() {
- return histogram;
- }
-
public double getSelectivity() {
return selectivity;
}
@@ -162,6 +151,6 @@ public class ColumnStatisticBuilder {
public ColumnStatistic build() {
return new ColumnStatistic(count, ndv, avgSizeByte, numNulls,
- dataSize, minValue, maxValue, histogram, selectivity, minExpr, maxExpr, isUnknown);
+ dataSize, minValue, maxValue, selectivity, minExpr, maxExpr, isUnknown);
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
index 0516c1fabf..fec57ac4c6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Histogram.java
@@ -17,14 +17,15 @@
package org.apache.doris.statistics;
-import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Type;
+import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.statistics.util.StatisticsUtil;
-import com.alibaba.fastjson2.JSON;
-import com.alibaba.fastjson2.JSONArray;
-import com.alibaba.fastjson2.JSONObject;
import com.google.common.collect.Lists;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.parquet.Strings;
@@ -34,71 +35,71 @@ import java.util.List;
public class Histogram {
private static final Logger LOG = LogManager.getLogger(Histogram.class);
- private Type dataType;
+ public final Type dataType;
- private int maxBucketSize;
- private int bucketSize;
- private float sampleRate;
+ public final int maxBucketNum;
- private List<Bucket> buckets;
+ public final int bucketNum;
- public Histogram(Type dataType) {
- this.dataType = dataType;
- }
+ public final double sampleRate;
- public Type getDataType() {
- return dataType;
- }
+ public final List<Bucket> buckets;
- public void setDataType(Type dataType) {
+ public Histogram(Type dataType, int maxBucketNum, int bucketNum,
+ double sampleRate, List<Bucket> buckets) {
this.dataType = dataType;
+ this.maxBucketNum = maxBucketNum;
+ this.bucketNum = bucketNum;
+ this.sampleRate = sampleRate;
+ this.buckets = buckets;
}
- public int getMaxBucketSize() {
- return maxBucketSize;
- }
+ public static Histogram DEFAULT = new HistogramBuilder()
+ .setDataType(Type.INVALID).setMaxBucketNum(1)
+ .setBucketNum(0).setSampleRate(1.0).setBuckets(Lists.newArrayList()).build();
- public void setMaxBucketSize(int maxBucketSize) {
- this.maxBucketSize = maxBucketSize;
- }
+ // TODO: use thrift
+ public static Histogram fromResultRow(ResultRow resultRow) {
+ try {
+ HistogramBuilder histogramBuilder = new HistogramBuilder();
+
+ long catalogId = Long.parseLong(resultRow.getColumnValue("catalog_id"));
+ long idxId = Long.parseLong(resultRow.getColumnValue("idx_id"));
+ long dbId = Long.parseLong(resultRow.getColumnValue("db_id"));
+ long tblId = Long.parseLong(resultRow.getColumnValue("tbl_id"));
+
+ String colName = resultRow.getColumnValue("col_id");
+ Column col = StatisticsUtil.findColumn(catalogId, dbId, tblId, idxId, colName);
+ if (col == null) {
+ LOG.warn("Failed to deserialize histogram statistics, ctlId: {} dbId: {}"
+ + "tblId: {} column: {} not exists",
+ catalogId, dbId, tblId, colName);
+ return Histogram.DEFAULT;
+ }
- public int getBucketSize() {
- return bucketSize;
- }
+ double sampleRate = Double.parseDouble(resultRow.getColumnValue("sample_rate"));
+ histogramBuilder.setSampleRate(sampleRate);
+ histogramBuilder.setDataType(col.getType());
- public void setBucketSize(int bucketSize) {
- this.bucketSize = bucketSize;
- }
+ String json = resultRow.getColumnValue("buckets");
+ JsonObject jsonObj = JsonParser.parseString(json).getAsJsonObject();
- public float getSampleRate() {
- return sampleRate;
- }
+ int maxBucketNum = jsonObj.get("max_bucket_num").getAsInt();
+ histogramBuilder.setMaxBucketNum(maxBucketNum);
- public void setSampleRate(float sampleRate) {
- if (sampleRate < 0f || sampleRate > 1f) {
- this.sampleRate = 1f;
- } else {
- this.sampleRate = sampleRate;
- }
- }
+ int bucketNum = jsonObj.get("bucket_num").getAsInt();
+ histogramBuilder.setBucketNum(bucketNum);
- public void setBuckets(List<Bucket> buckets) {
- this.buckets = buckets;
- }
-
- public List<Bucket> getBuckets() {
- return buckets;
- }
+ JsonArray jsonArray = jsonObj.getAsJsonArray("buckets");
+ List<Bucket> buckets = Bucket.deserializeFromjson(col.getType(), jsonArray);
+ histogramBuilder.setBuckets(buckets);
- public static Histogram defaultHistogram() {
- Type type = Type.fromPrimitiveType(PrimitiveType.INVALID_TYPE);
- List<Bucket> buckets = Lists.newArrayList();
- Histogram histogram = new Histogram(type);
- histogram.setMaxBucketSize(0);
- histogram.setBucketSize(0);
- histogram.setSampleRate(1.0f);
- histogram.setBuckets(buckets);
- return histogram;
+ return histogramBuilder.build();
+ } catch (Exception e) {
+ e.printStackTrace();
+ LOG.warn("Failed to deserialize histogram statistics.", e);
+ return Histogram.DEFAULT;
+ }
}
/**
@@ -111,37 +112,28 @@ public class Histogram {
}
try {
- Histogram histogram = new Histogram(datatype);
- JSONObject histogramJson = JSON.parseObject(json);
-
- List<Bucket> buckets = Lists.newArrayList();
- JSONArray jsonArray = histogramJson.getJSONArray("buckets");
-
- for (int i = 0; i < jsonArray.size(); i++) {
- JSONObject bucketJson = jsonArray.getJSONObject(i);
- Bucket bucket = new Bucket();
- bucket.lower = StatisticsUtil.readableValue(datatype, bucketJson.get("lower").toString());
- bucket.upper = StatisticsUtil.readableValue(datatype, bucketJson.get("upper").toString());
- bucket.count = bucketJson.getIntValue("count");
- bucket.preSum = bucketJson.getIntValue("pre_sum");
- bucket.ndv = bucketJson.getIntValue("ndv");
- buckets.add(bucket);
- }
+ HistogramBuilder histogramBuilder = new HistogramBuilder();
- histogram.setBuckets(buckets);
+ histogramBuilder.setDataType(datatype);
- int maxBucketSize = histogramJson.getIntValue("max_bucket_size");
- histogram.setMaxBucketSize(maxBucketSize);
+ JsonObject histogramJson = JsonParser.parseString(json).getAsJsonObject();
+ JsonArray jsonArray = histogramJson.getAsJsonArray("buckets");
+ List<Bucket> buckets = Bucket.deserializeFromjson(datatype, jsonArray);
- int bucketSize = histogramJson.getIntValue("bucket_size");
- histogram.setBucketSize(bucketSize);
+ histogramBuilder.setBuckets(buckets);
- float sampleRate = histogramJson.getFloatValue("sample_rate");
- histogram.setSampleRate(sampleRate);
+ int maxBucketSize = histogramJson.get("max_bucket_num").getAsInt();
+ histogramBuilder.setMaxBucketNum(maxBucketSize);
- return histogram;
+ int bucketSize = histogramJson.get("bucket_num").getAsInt();
+ histogramBuilder.setBucketNum(bucketSize);
+
+ float sampleRate = histogramJson.get("sample_rate").getAsFloat();
+ histogramBuilder.setSampleRate(sampleRate);
+
+ return histogramBuilder.build();
} catch (Throwable e) {
- LOG.warn("deserialize from json error, input json string: {}", json, e);
+ LOG.error("deserialize from json error.", e);
}
return null;
@@ -155,26 +147,25 @@ public class Histogram {
return "";
}
- JSONObject histogramJson = new JSONObject();
- histogramJson.put("max_bucket_size", histogram.maxBucketSize);
- histogramJson.put("bucket_size", histogram.bucketSize);
- histogramJson.put("sample_rate", histogram.sampleRate);
-
- JSONArray bucketsJsonArray = new JSONArray();
- histogramJson.put("buckets", bucketsJsonArray);
-
- if (histogram.buckets != null) {
- for (Bucket bucket : histogram.buckets) {
- JSONObject bucketJson = new JSONObject();
- bucketJson.put("count", bucket.count);
- bucketJson.put("pre_sum", bucket.preSum);
- bucketJson.put("ndv", bucket.ndv);
- bucketJson.put("upper", bucket.upper.getStringValue());
- bucketJson.put("lower", bucket.lower.getStringValue());
- bucketsJsonArray.add(bucketJson);
- }
+ JsonObject histogramJson = new JsonObject();
+
+ histogramJson.addProperty("max_bucket_num", histogram.maxBucketNum);
+ histogramJson.addProperty("bucket_num", histogram.bucketNum);
+ histogramJson.addProperty("sample_rate", histogram.sampleRate);
+
+ JsonArray bucketsJsonArray = new JsonArray();
+ histogramJson.add("buckets", bucketsJsonArray);
+
+ for (Bucket bucket : histogram.buckets) {
+ JsonObject bucketJson = new JsonObject();
+ bucketJson.addProperty("count", bucket.count);
+ bucketJson.addProperty("pre_sum", bucket.preSum);
+ bucketJson.addProperty("ndv", bucket.ndv);
+ bucketJson.addProperty("upper", bucket.upper.getStringValue());
+ bucketJson.addProperty("lower", bucket.lower.getStringValue());
+ bucketsJsonArray.add(bucketJson);
}
- return histogramJson.toJSONString();
+ return histogramJson.toString();
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java
new file mode 100644
index 0000000000..dcc2299882
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramBuilder.java
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.catalog.Type;
+
+import java.util.Comparator;
+import java.util.List;
+
+public class HistogramBuilder {
+ private Type dataType;
+
+ private int maxBucketNum;
+
+ private int bucketNum;
+
+ private double sampleRate;
+
+ private List<Bucket> buckets;
+
+ public HistogramBuilder() {
+ }
+
+ public HistogramBuilder(Histogram histogram) {
+ this.dataType = histogram.dataType;
+ this.maxBucketNum = histogram.maxBucketNum;
+ this.bucketNum = histogram.bucketNum;
+ this.sampleRate = histogram.sampleRate;
+ this.buckets = histogram.buckets;
+ }
+
+ public HistogramBuilder setDataType(Type dataType) {
+ this.dataType = dataType;
+ return this;
+ }
+
+ public HistogramBuilder setMaxBucketNum(int maxBucketNum) {
+ this.maxBucketNum = maxBucketNum;
+ return this;
+ }
+
+ public HistogramBuilder setBucketNum(int bucketNum) {
+ this.bucketNum = bucketNum;
+ return this;
+ }
+
+ public HistogramBuilder setSampleRate(double sampleRate) {
+ if (sampleRate < 0 || sampleRate > 1.0) {
+ this.sampleRate = 1.0;
+ } else {
+ this.sampleRate = sampleRate;
+ }
+ return this;
+ }
+
+ public HistogramBuilder setBuckets(List<Bucket> buckets) {
+ buckets.sort(Comparator.comparing(Bucket::getLower));
+ this.buckets = buckets;
+ return this;
+ }
+
+ public Type getDataType() {
+ return dataType;
+ }
+
+ public int getMaxBucketNum() {
+ return maxBucketNum;
+ }
+
+ public int getBucketNum() {
+ return bucketNum;
+ }
+
+ public double getSampleRate() {
+ return sampleRate;
+ }
+
+ public List<Bucket> getBuckets() {
+ return buckets;
+ }
+
+ public Histogram build() {
+ return new Histogram(dataType, maxBucketNum, bucketNum, sampleRate, buckets);
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
new file mode 100644
index 0000000000..cc349e0425
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.FeConstants;
+import org.apache.doris.qe.AutoCloseConnectContext;
+import org.apache.doris.qe.StmtExecutor;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.text.StringSubstitutor;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Each task analyze one column.
+ */
+public class HistogramTask extends BaseAnalysisTask {
+
+ /** To avoid too much data, use the following efficient sampling method */
+ private static final String ANALYZE_HISTOGRAM_SQL_TEMPLATE = "INSERT INTO "
+ + "${internalDB}.${histogramStatTbl} "
+ + "SELECT "
+ + " CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
+ + " ${catalogId} AS catalog_id, "
+ + " ${dbId} AS db_id, "
+ + " ${tblId} AS tbl_id, "
+ + " ${idxId} AS idx_id, "
+ + " '${colId}' AS col_id, "
+ + " ${sampleRate} AS sample_rate, "
+ + " `HISTOGRAM`(`${colName}`, 1, ${maxBucketNum}) AS buckets, "
+ + " NOW() AS create_time "
+ + "FROM "
+ + " `${dbName}`.`${tblName}` TABLESAMPLE (${percentValue} PERCENT)";
+
+ @VisibleForTesting
+ public HistogramTask() {
+ super();
+ }
+
+ public HistogramTask(AnalysisTaskScheduler analysisTaskScheduler, AnalysisTaskInfo info) {
+ super(analysisTaskScheduler, info);
+ }
+
+ @Override
+ public void execute() throws Exception {
+ Map<String, String> params = new HashMap<>();
+ params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
+ params.put("histogramStatTbl", StatisticConstants.HISTOGRAM_TBL_NAME);
+ params.put("catalogId", String.valueOf(catalog.getId()));
+ params.put("dbId", String.valueOf(db.getId()));
+ params.put("tblId", String.valueOf(tbl.getId()));
+ params.put("idxId", "-1");
+ params.put("colId", String.valueOf(info.colName));
+ params.put("dbName", info.dbName);
+ params.put("tblName", String.valueOf(info.tblName));
+ params.put("colName", String.valueOf(info.colName));
+ params.put("sampleRate", String.valueOf(info.sampleRate));
+ params.put("maxBucketNum", String.valueOf(info.maxBucketNum));
+ params.put("percentValue", String.valueOf((int) (info.sampleRate * 100)));
+
+ StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
+ String histogramSql = stringSubstitutor.replace(ANALYZE_HISTOGRAM_SQL_TEMPLATE);
+ LOG.info("SQL to collect the histogram:\n {}", histogramSql);
+
+ try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
+ this.stmtExecutor = new StmtExecutor(r.connectContext, histogramSql);
+ this.stmtExecutor.execute();
+ }
+
+ Env.getCurrentEnv().getStatisticsCache().refreshSync(tbl.getId(), -1, col.getName());
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
index af2103f13a..44be5bf078 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
@@ -58,12 +58,12 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
private static final String ANALYZE_PARTITION_SQL_TEMPLATE = "INSERT INTO "
+ "${internalDB}.${columnStatTbl}"
+ " values ('${id}','${catalogId}', '${dbId}', '${tblId}', '-1', '${colId}', '${partId}', "
- + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', NULL, ${dataSize}, '${update_time}')";
+ + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', ${dataSize}, '${update_time}')";
private static final String ANALYZE_TABLE_SQL_TEMPLATE = "INSERT INTO "
+ "${internalDB}.${columnStatTbl}"
+ " values ('${id}','${catalogId}', '${dbId}', '${tblId}', NULL, '${colId}', NULL, "
- + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', NULL, ${dataSize}, '${update_time}')";
+ + "${numRows}, ${ndv}, ${nulls}, '${min}', '${max}', ${dataSize}, '${update_time}')";
@Override
protected void getColumnStatsByMeta() throws Exception {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java
index 4dfc68a946..fcb6abf457 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/IcebergAnalysisTask.java
@@ -52,7 +52,7 @@ public class IcebergAnalysisTask extends HMSAnalysisTask {
private static final String INSERT_TABLE_SQL_TEMPLATE = "INSERT INTO "
+ "${internalDB}.${columnStatTbl}"
+ " values ('${id}','${catalogId}', '${dbId}', '${tblId}', '-1', '${colId}', NULL, "
- + "${numRows}, 0, ${nulls}, '0', '0', NULL, ${dataSize}, '${update_time}')";
+ + "${numRows}, 0, ${nulls}, '0', '0', ${dataSize}, '${update_time}')";
@Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistic.java
similarity index 51%
copy from fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
copy to fe/fe-core/src/main/java/org/apache/doris/statistics/Statistic.java
index adcbd27d73..b1ecded2f4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Bucket.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistic.java
@@ -17,52 +17,39 @@
package org.apache.doris.statistics;
-import org.apache.doris.analysis.LiteralExpr;
+public class Statistic {
-public class Bucket {
- public LiteralExpr lower;
- public LiteralExpr upper;
- public int count;
- public int preSum;
- public int ndv;
+ public Histogram histogram;
- public LiteralExpr getLower() {
- return lower;
- }
-
- public void setLower(LiteralExpr lower) {
- this.lower = lower;
- }
-
- public LiteralExpr getUpper() {
- return upper;
- }
-
- public void setUpper(LiteralExpr upper) {
- this.upper = upper;
- }
+ public ColumnStatistic columnStatistic;
- public int getCount() {
- return count;
+ public Statistic() {
}
- public void setCount(int count) {
- this.count = count;
+ public Statistic(Histogram histogram, ColumnStatistic columnStatistic) {
+ this.histogram = histogram;
+ this.columnStatistic = columnStatistic;
}
- public int getPreSum() {
- return preSum;
+ public Histogram getHistogram() {
+ if (histogram != null) {
+ return histogram;
+ }
+ return Histogram.DEFAULT;
}
- public void setPreSum(int preSum) {
- this.preSum = preSum;
+ public void setHistogram(Histogram histogram) {
+ this.histogram = histogram;
}
- public int getNdv() {
- return ndv;
+ public ColumnStatistic getColumnStatistic() {
+ if (columnStatistic != null) {
+ return columnStatistic;
+ }
+ return ColumnStatistic.DEFAULT;
}
- public void setNdv(int ndv) {
- this.ndv = ndv;
+ public void setColumnStatistic(ColumnStatistic columnStatistic) {
+ this.columnStatistic = columnStatistic;
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index dc89cc42fa..df34c2f9d6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -22,6 +22,8 @@ import java.util.concurrent.TimeUnit;
public class StatisticConstants {
public static final String STATISTIC_TBL_NAME = "column_statistics";
+ public static final String HISTOGRAM_TBL_NAME = "histogram_statistics";
+
public static final String ANALYSIS_JOB_TABLE = "analysis_jobs";
public static final int MAX_NAME_LEN = 64;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index b92873460c..08c58800ac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -31,7 +31,7 @@ public class StatisticsCache {
private static final Logger LOG = LogManager.getLogger(StatisticsCache.class);
- private final AsyncLoadingCache<StatisticsCacheKey, ColumnStatistic> cache = Caffeine.newBuilder()
+ private final AsyncLoadingCache<StatisticsCacheKey, Statistic> cache = Caffeine.newBuilder()
.maximumSize(StatisticConstants.STATISTICS_RECORDS_CACHE_SIZE)
.expireAfterAccess(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_VALID_DURATION_IN_HOURS))
.refreshAfterWrite(Duration.ofHours(StatisticConstants.STATISTICS_CACHE_REFRESH_INTERVAL))
@@ -42,29 +42,49 @@ public class StatisticsCache {
}
public ColumnStatistic getColumnStatistics(long tblId, long idxId, String colName) {
- if (ConnectContext.get().getSessionVariable().internalSession) {
+ ConnectContext ctx = ConnectContext.get();
+ if (ctx != null && ctx.getSessionVariable().internalSession) {
return ColumnStatistic.DEFAULT;
}
StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
try {
- CompletableFuture<ColumnStatistic> f = cache.get(k);
- if (f.isDone()) {
- return f.get();
+ CompletableFuture<Statistic> f = cache.get(k);
+ if (f.isDone() && f.get() != null) {
+ return f.get().getColumnStatistic();
}
} catch (Exception e) {
LOG.warn("Unexpected exception while returning ColumnStatistic", e);
- return ColumnStatistic.DEFAULT;
}
return ColumnStatistic.DEFAULT;
}
+ public Histogram getHistogram(long tblId, String colName) {
+ return getHistogram(tblId, -1, colName);
+ }
+
+ public Histogram getHistogram(long tblId, long idxId, String colName) {
+ ConnectContext ctx = ConnectContext.get();
+ if (ctx != null && ctx.getSessionVariable().internalSession) {
+ return Histogram.DEFAULT;
+ }
+ StatisticsCacheKey k = new StatisticsCacheKey(tblId, idxId, colName);
+ try {
+ CompletableFuture<Statistic> f = cache.get(k);
+ if (f.isDone() && f.get() != null) {
+ return f.get().getHistogram();
+ }
+ } catch (Exception e) {
+ LOG.warn("Unexpected exception while returning Histogram", e);
+ }
+ return Histogram.DEFAULT;
+ }
+
// TODO: finish this method.
public void eraseExpiredCache(long tblId, long idxId, String colName) {
cache.synchronous().invalidate(new StatisticsCacheKey(tblId, idxId, colName));
}
- public void updateCache(long tblId, long idxId, String colName, ColumnStatistic statistic) {
-
+ public void updateCache(long tblId, long idxId, String colName, Statistic statistic) {
cache.synchronous().put(new StatisticsCacheKey(tblId, idxId, colName), statistic);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
index dea76b6201..08781e5689 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCacheLoader.java
@@ -35,7 +35,7 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executor;
-public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, ColumnStatistic> {
+public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKey, Statistic> {
private static final Logger LOG = LogManager.getLogger(StatisticsCacheLoader.class);
@@ -43,13 +43,17 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
+ "." + StatisticConstants.STATISTIC_TBL_NAME + " WHERE "
+ "id = CONCAT('${tblId}', '-', ${idxId}, '-', '${colId}')";
+ private static final String QUERY_HISTOGRAM_STATISTICS = "SELECT * FROM " + FeConstants.INTERNAL_DB_NAME
+ + "." + StatisticConstants.HISTOGRAM_TBL_NAME + " WHERE "
+ + "id = CONCAT('${tblId}', '-', ${idxId}, '-', '${colId}')";
+
private static int CUR_RUNNING_LOAD = 0;
private static final Object LOCK = new Object();
// TODO: Maybe we should trigger a analyze job when the required ColumnStatistic doesn't exists.
@Override
- public @NonNull CompletableFuture<ColumnStatistic> asyncLoad(@NonNull StatisticsCacheKey key,
+ public @NonNull CompletableFuture<Statistic> asyncLoad(@NonNull StatisticsCacheKey key,
@NonNull Executor executor) {
synchronized (LOCK) {
if (CUR_RUNNING_LOAD > StatisticConstants.LOAD_TASK_LIMITS) {
@@ -61,31 +65,53 @@ public class StatisticsCacheLoader implements AsyncCacheLoader<StatisticsCacheKe
}
CUR_RUNNING_LOAD++;
return CompletableFuture.supplyAsync(() -> {
+ Statistic statistic = new Statistic();
+
try {
Map<String, String> params = new HashMap<>();
params.put("tblId", String.valueOf(key.tableId));
params.put("idxId", String.valueOf(key.idxId));
params.put("colId", String.valueOf(key.colName));
- List<ResultRow> resultBatches =
+
+ List<ColumnStatistic> columnStatistics;
+ List<ResultRow> columnResult =
StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
.replace(QUERY_COLUMN_STATISTICS));
- List<ColumnStatistic> columnStatistics = null;
try {
- columnStatistics = StatisticsUtil.deserializeToColumnStatistics(resultBatches);
+ columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResult);
} catch (Exception e) {
LOG.warn("Failed to deserialize column statistics", e);
throw new CompletionException(e);
}
if (CollectionUtils.isEmpty(columnStatistics)) {
- return ColumnStatistic.DEFAULT;
+ statistic.setColumnStatistic(ColumnStatistic.DEFAULT);
+ } else {
+ statistic.setColumnStatistic(columnStatistics.get(0));
+ }
+
+ List<Histogram> histogramStatistics;
+ List<ResultRow> histogramResult =
+ StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
+ .replace(QUERY_HISTOGRAM_STATISTICS));
+ try {
+ histogramStatistics = StatisticsUtil.deserializeToHistogramStatistics(histogramResult);
+ } catch (Exception e) {
+ LOG.warn("Failed to deserialize histogram statistics", e);
+ throw new CompletionException(e);
+ }
+ if (CollectionUtils.isEmpty(histogramStatistics)) {
+ statistic.setHistogram(Histogram.DEFAULT);
+ } else {
+ statistic.setHistogram(histogramStatistics.get(0));
}
- return columnStatistics.get(0);
} finally {
synchronized (LOCK) {
CUR_RUNNING_LOAD--;
LOCK.notify();
}
}
+
+ return statistic;
});
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 7901c1c918..475c972a22 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -74,8 +74,7 @@ public class StatisticsRepository {
private static final String INSERT_INTO_COLUMN_STATISTICS = "INSERT INTO "
+ FULL_QUALIFIED_COLUMN_STATISTICS_NAME + " VALUES('${id}', ${catalogId}, ${dbId}, ${tblId}, '${idxId}',"
- + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', "
- + "'${histogram}', ${dataSize}, NOW())";
+ + "'${colId}', ${partId}, ${count}, ${ndv}, ${nullCount}, '${min}', '${max}', ${dataSize}, NOW())";
public static ColumnStatistic queryColumnStatisticsByName(long tableId, String colName) {
ResultRow resultRow = queryColumnStatisticById(tableId, colName);
@@ -159,7 +158,6 @@ public class StatisticsRepository {
String nullCount = alterColumnStatsStmt.getValue(StatsType.NUM_NULLS);
String min = alterColumnStatsStmt.getValue(StatsType.MIN_VALUE);
String max = alterColumnStatsStmt.getValue(StatsType.MAX_VALUE);
- String histogram = alterColumnStatsStmt.getValue(StatsType.HISTOGRAM);
String dataSize = alterColumnStatsStmt.getValue(StatsType.DATA_SIZE);
ColumnStatisticBuilder builder = new ColumnStatisticBuilder();
String colName = alterColumnStatsStmt.getColumnName();
@@ -181,12 +179,10 @@ public class StatisticsRepository {
builder.setMaxExpr(StatisticsUtil.readableValue(column.getType(), max));
builder.setMaxValue(StatisticsUtil.convertToDouble(column.getType(), max));
}
- if (histogram != null) {
- builder.setHistogram(Histogram.deserializeFromJson(column.getType(), histogram));
- }
if (dataSize != null) {
builder.setDataSize(Double.parseDouble(dataSize));
}
+
ColumnStatistic columnStatistic = builder.build();
Map<String, String> params = new HashMap<>();
params.put("id", constructId(objects.table.getId(), -1, colName));
@@ -201,9 +197,17 @@ public class StatisticsRepository {
params.put("nullCount", String.valueOf(columnStatistic.numNulls));
params.put("min", min == null ? "NULL" : min);
params.put("max", max == null ? "NULL" : max);
- params.put("histogram", (columnStatistic.histogram == null) ? "NULL" : histogram);
params.put("dataSize", String.valueOf(columnStatistic.dataSize));
StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params);
- Env.getCurrentEnv().getStatisticsCache().updateCache(objects.table.getId(), -1, colName, builder.build());
+
+ Histogram histogram = Env.getCurrentEnv().getStatisticsCache()
+ .getHistogram(objects.table.getId(), -1, colName);
+
+ Statistic statistic = new Statistic();
+ statistic.setHistogram(histogram);
+ statistic.setColumnStatistic(builder.build());
+
+ Env.getCurrentEnv().getStatisticsCache()
+ .updateCache(objects.table.getId(), -1, colName, statistic);
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 08822bb27c..8ceabf4897 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -29,8 +29,10 @@ import org.apache.doris.analysis.StatementBase;
import org.apache.doris.analysis.StringLiteral;
import org.apache.doris.analysis.TableName;
import org.apache.doris.analysis.UserIdentity;
+import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.TableIf;
@@ -47,6 +49,7 @@ import org.apache.doris.qe.SessionVariable;
import org.apache.doris.qe.StmtExecutor;
import org.apache.doris.statistics.AnalysisTaskInfo;
import org.apache.doris.statistics.ColumnStatistic;
+import org.apache.doris.statistics.Histogram;
import org.apache.doris.statistics.StatisticConstants;
import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
import org.apache.doris.system.SystemInfoService;
@@ -107,6 +110,11 @@ public class StatisticsUtil {
return resultBatches.stream().map(ColumnStatistic::fromResultRow).collect(Collectors.toList());
}
+ public static List<Histogram> deserializeToHistogramStatistics(List<ResultRow> resultBatches)
+ throws Exception {
+ return resultBatches.stream().map(Histogram::fromResultRow).collect(Collectors.toList());
+ }
+
public static AutoCloseConnectContext buildConnectContext() {
ConnectContext connectContext = new ConnectContext();
SessionVariable sessionVariable = connectContext.getSessionVariable();
@@ -253,4 +261,26 @@ public class StatisticsUtil {
}
return new DBObjects(catalogIf, databaseIf, tableIf);
}
+
+ public static Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
+ CatalogIf<DatabaseIf<TableIf>> catalogIf = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId);
+ if (catalogIf == null) {
+ return null;
+ }
+ DatabaseIf<TableIf> db = catalogIf.getDb(dbId).orElse(null);
+ if (db == null) {
+ return null;
+ }
+ TableIf tblIf = db.getTable(tblId).orElse(null);
+ if (tblIf == null) {
+ return null;
+ }
+ if (idxId != -1) {
+ if (tblIf instanceof OlapTable) {
+ OlapTable olapTable = (OlapTable) tblIf;
+ return olapTable.getIndexIdToMeta().get(idxId).getColumnByName(columnName);
+ }
+ }
+ return tblIf.getColumn(columnName);
+ }
}
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex
index d9c89907de..b66a91004a 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -478,6 +478,7 @@ import org.apache.doris.qe.SqlModeHelper;
keywordMap.put("write", new Integer(SqlParserSymbols.KW_WRITE));
keywordMap.put("year", new Integer(SqlParserSymbols.KW_YEAR));
keywordMap.put("mtmv", new Integer(SqlParserSymbols.KW_MTMV));
+ keywordMap.put("histogram", new Integer(SqlParserSymbols.KW_HISTOGRAM));
}
// map from token id to token description
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java
index 99e6a24338..49e80acccf 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java
@@ -191,7 +191,7 @@ public class HyperGraphBuilder {
int count = rowCounts.get(Integer.parseInt(scanPlan.getTable().getName()));
for (Slot slot : scanPlan.getOutput()) {
slotIdToColumnStats.put(slot.getExprId(),
- new ColumnStatistic(count, count, 0, 0, 0, 0, 0, null, 0, null, null, true));
+ new ColumnStatistic(count, count, 0, 0, 0, 0, 0, 0, null, null, true));
}
StatsDeriveResult stats = new StatsDeriveResult(count, slotIdToColumnStats);
group.setStatistics(stats);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
index a01dd8f7cf..2cb428e402 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
@@ -82,7 +82,7 @@ public class AnalysisJobTest extends TestWithFeService {
return connectContext;
}
};
- String sql = "ANALYZE t1";
+ String sql = "ANALYZE TABLE t1";
Assertions.assertNotNull(getSqlStmtExecutor(sql));
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index 96ae8fae9f..f04c846456 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -32,6 +32,7 @@ import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
@@ -63,14 +64,12 @@ public class CacheTest extends TestWithFeService {
@Test
public void testLoad() throws Exception {
- new MockUp<ColumnStatistic>() {
+ new MockUp<StatisticsUtil>() {
@Mock
public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
return new Column("abc", PrimitiveType.BIGINT);
}
- };
- new MockUp<StatisticsUtil>() {
@Mock
public List<ResultRow> execStatisticQuery(String sql) {
@@ -91,7 +90,6 @@ public class CacheTest extends TestWithFeService {
colNames.add("col_id");
colNames.add("min");
colNames.add("max");
- colNames.add("histogram");
List<PrimitiveType> primitiveTypes = new ArrayList<>();
primitiveTypes.add(PrimitiveType.BIGINT);
primitiveTypes.add(PrimitiveType.BIGINT);
@@ -103,6 +101,7 @@ public class CacheTest extends TestWithFeService {
primitiveTypes.add(PrimitiveType.VARCHAR);
primitiveTypes.add(PrimitiveType.VARCHAR);
primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
List<String> values = new ArrayList<>();
values.add("1");
values.add("2");
@@ -115,7 +114,6 @@ public class CacheTest extends TestWithFeService {
values.add("8");
values.add("9");
values.add("10");
- values.add("");
ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values);
return Arrays.asList(resultRow);
}
@@ -123,10 +121,78 @@ public class CacheTest extends TestWithFeService {
StatisticsCache statisticsCache = new StatisticsCache();
ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(0, "col");
Assertions.assertEquals(ColumnStatistic.DEFAULT, columnStatistic);
- Thread.sleep(100);
+ Thread.sleep(1000);
columnStatistic = statisticsCache.getColumnStatistics(0, "col");
Assertions.assertEquals(1, columnStatistic.count);
Assertions.assertEquals(2, columnStatistic.ndv);
Assertions.assertEquals(10, columnStatistic.maxValue);
}
+
+ @Test
+ public void testLoadHistogram() throws Exception {
+ new MockUp<StatisticsUtil>() {
+
+ @Mock
+ public Column findColumn(long catalogId, long dbId, long tblId, long idxId, String columnName) {
+ return new Column("abc", PrimitiveType.DATETIME);
+ }
+
+ @Mock
+ public List<ResultRow> execStatisticQuery(String sql) {
+ try {
+ Thread.sleep(50);
+ } catch (InterruptedException e) {
+ // ignore
+ }
+ List<String> colNames = new ArrayList<>();
+ colNames.add("catalog_id");
+ colNames.add("db_id");
+ colNames.add("idx_id");
+ colNames.add("tbl_id");
+ colNames.add("col_id");
+ colNames.add("sample_rate");
+ colNames.add("buckets");
+ List<PrimitiveType> primitiveTypes = new ArrayList<>();
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ primitiveTypes.add(PrimitiveType.VARCHAR);
+ List<String> values = new ArrayList<>();
+ values.add("1");
+ values.add("2");
+ values.add("3");
+ values.add("-1");
+ values.add("4");
+ values.add("0.2");
+ String buckets = "{\"max_bucket_num\":128,\"bucket_num\":5,\"sample_rate\":1.0,\"buckets\":"
+ + "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\","
+ + "\"count\":9,\"pre_sum\":0,\"ndv\":1},"
+ + "{\"lower\":\"2022-09-22 17:30:29\",\"upper\":\"2022-09-22 22:30:29\","
+ + "\"count\":10,\"pre_sum\":9,\"ndv\":1},"
+ + "{\"lower\":\"2022-09-23 17:30:29\",\"upper\":\"2022-09-23 22:30:29\","
+ + "\"count\":9,\"pre_sum\":19,\"ndv\":1},"
+ + "{\"lower\":\"2022-09-24 17:30:29\",\"upper\":\"2022-09-24 22:30:29\","
+ + "\"count\":9,\"pre_sum\":28,\"ndv\":1},"
+ + "{\"lower\":\"2022-09-25 17:30:29\",\"upper\":\"2022-09-25 22:30:29\","
+ + "\"count\":9,\"pre_sum\":37,\"ndv\":1}]}";
+ values.add(buckets);
+ ResultRow resultRow = new ResultRow(colNames, primitiveTypes, values);
+ return Collections.singletonList(resultRow);
+ }
+ };
+
+ StatisticsCache statisticsCache = new StatisticsCache();
+ Histogram histogram = statisticsCache.getHistogram(0, "col");
+ Assertions.assertEquals(Histogram.DEFAULT, histogram);
+ Thread.sleep(1000);
+ histogram = statisticsCache.getHistogram(0, "col");
+ Assertions.assertEquals("DATETIME", histogram.dataType.toString());
+ Assertions.assertEquals(128, histogram.maxBucketNum);
+ Assertions.assertEquals(5, histogram.bucketNum);
+ Assertions.assertEquals(0.2, histogram.sampleRate);
+ Assertions.assertEquals(5, histogram.buckets.size());
+ }
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
index f575872c23..df2546c843 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTest.java
@@ -39,7 +39,7 @@ class HistogramTest {
@BeforeEach
void setUp() throws Exception {
- String json = "{\"max_bucket_size\":128,\"bucket_size\":5,\"sample_rate\":1.0,\"buckets\":"
+ String json = "{\"max_bucket_num\":128,\"bucket_num\":5,\"sample_rate\":1.0,\"buckets\":"
+ "[{\"lower\":\"2022-09-21 17:30:29\",\"upper\":\"2022-09-21 22:30:29\","
+ "\"count\":9,\"pre_sum\":0,\"ndv\":1},"
+ "{\"lower\":\"2022-09-22 17:30:29\",\"upper\":\"2022-09-22 22:30:29\","
@@ -58,19 +58,19 @@ class HistogramTest {
@Test
void testDeserializeFromJson() throws Exception {
- Type dataType = histogramUnderTest.getDataType();
+ Type dataType = histogramUnderTest.dataType;
Assertions.assertTrue(dataType.isDatetime());
- int maxBucketSize = histogramUnderTest.getMaxBucketSize();
+ int maxBucketSize = histogramUnderTest.maxBucketNum;
Assertions.assertEquals(128, maxBucketSize);
- int bucketSize = histogramUnderTest.getBucketSize();
+ int bucketSize = histogramUnderTest.bucketNum;
Assertions.assertEquals(5, bucketSize);
- float sampleRate = histogramUnderTest.getSampleRate();
+ double sampleRate = histogramUnderTest.sampleRate;
Assertions.assertEquals(1.0, sampleRate);
- List<Bucket> buckets = histogramUnderTest.getBuckets();
+ List<Bucket> buckets = histogramUnderTest.buckets;
Assertions.assertEquals(5, buckets.size());
LiteralExpr expectedLower = LiteralExpr.create("2022-09-21 17:30:29",
@@ -97,10 +97,10 @@ class HistogramTest {
String json = Histogram.serializeToJson(histogramUnderTest);
JSONObject histogramJson = JSON.parseObject(json);
- int maxBucketSize = histogramJson.getIntValue("max_bucket_size");
+ int maxBucketSize = histogramJson.getIntValue("max_bucket_num");
Assertions.assertEquals(128, maxBucketSize);
- int bucketSize = histogramJson.getIntValue("bucket_size");
+ int bucketSize = histogramJson.getIntValue("bucket_num");
Assertions.assertEquals(5, bucketSize);
float sampleRate = histogramJson.getFloat("sample_rate");
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java
index 7ed261c831..6040731a14 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/MVStatisticsTest.java
@@ -79,7 +79,7 @@ public class MVStatisticsTest extends TestWithFeService {
};
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
Deencapsulation.setField(analysisManager, "statisticsCache", statisticsCache);
- getSqlStmtExecutor("analyze t1");
+ getSqlStmtExecutor("analyze table t1");
Thread.sleep(3000);
}
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
index df4f48858a..0f2cebf511 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java
@@ -27,7 +27,7 @@ public class StatsDeriveResultTest {
public void testUpdateRowCountByLimit() {
StatsDeriveResult stats = new StatsDeriveResult(100);
ColumnStatistic a = new ColumnStatistic(100, 10, 1, 5, 10,
- 1, 100, null, 0.5, null, null, false);
+ 1, 100, 0.5, null, null, false);
Id id = new Id(1);
stats.addColumnStats(id, a);
StatsDeriveResult res = stats.updateByLimit(0);
diff --git a/regression-test/suites/statistics/alter_col_stats.groovy b/regression-test/suites/statistics/alter_col_stats.groovy
index 52714b9dfa..958e3341d4 100644
--- a/regression-test/suites/statistics/alter_col_stats.groovy
+++ b/regression-test/suites/statistics/alter_col_stats.groovy
@@ -33,7 +33,8 @@ suite("alter_column_stats") {
sql """INSERT INTO statistics_test VALUES(2, 'b', '2012-01-01')"""
sql """INSERT INTO statistics_test VALUES(3, 'c', '2013-01-01')"""
- sql """ANALYZE statistics_test"""
+ sql """ANALYZE TABLE statistics_test"""
+ sql """ANALYZE TABLE statistics_test UPDATE HISTOGRAM ON col1,col2"""
sleep(9000)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org