You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/10 04:28:59 UTC

[doris] branch master updated: [Improvement](statistics)Support external table partition statistics (#20415)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 87bc405c41 [Improvement](statistics)Support external table partition statistics (#20415)
87bc405c41 is described below

commit 87bc405c41f622a438fd3fb8ab6311d85867d23c
Author: Jibing-Li <64...@users.noreply.github.com>
AuthorDate: Sat Jun 10 12:28:53 2023 +0800

    [Improvement](statistics)Support external table partition statistics (#20415)
    
    Support collect statistics for HMS external table with specific partitions. Add session variables to limit the partitions to collect for whole table line number and columns statistics.
---
 .../org/apache/doris/common/FeMetaVersion.java     |   4 +-
 fe/fe-core/src/main/cup/sql_parser.cup             |   4 +-
 .../org/apache/doris/analysis/AnalyzeTblStmt.java  |  31 +++
 .../doris/catalog/external/HMSExternalTable.java   |  25 ++-
 .../java/org/apache/doris/qe/SessionVariable.java  |  14 ++
 .../org/apache/doris/statistics/AnalysisInfo.java  | 146 ++++++------
 .../doris/statistics/AnalysisInfoBuilder.java      |  28 ++-
 .../apache/doris/statistics/AnalysisManager.java   |  32 ++-
 .../apache/doris/statistics/ColumnStatistic.java   |   7 +-
 .../apache/doris/statistics/HiveAnalysisTask.java  | 250 ++++++++++++++-------
 10 files changed, 374 insertions(+), 167 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java b/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java
index e06de15db0..b1e42d343a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/FeMetaVersion.java
@@ -64,9 +64,11 @@ public final class FeMetaVersion {
     public static final int VERSION_121 = 121;
     // For IndexChangeJob
     public static final int VERSION_122 = 122;
+    // For AnalysisInfo
+    public static final int VERSION_123 = 123;
 
     // note: when increment meta version, should assign the latest version to VERSION_CURRENT
-    public static final int VERSION_CURRENT = VERSION_122;
+    public static final int VERSION_CURRENT = VERSION_123;
 
     // all logs meta version should >= the minimum version, so that we could remove many if clause, for example
     // if (FE_METAVERSION < VERSION_94) ...
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup
index cf95aea327..37f187ac85 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -2791,7 +2791,7 @@ show_create_reporitory_stmt ::=
 // analyze statment
 analyze_stmt ::=
     // statistics
-    KW_ANALYZE KW_TABLE table_name:tbl opt_col_list:cols
+    KW_ANALYZE KW_TABLE table_name:tbl opt_partition_names:partitions opt_col_list:cols
       opt_with_analysis_properties:withAnalysisProperties opt_properties:properties
     {:
         if (properties == null) {
@@ -2805,7 +2805,7 @@ analyze_stmt ::=
             properties.put("analysis.type", "FUNDAMENTALS");
         }
         AnalyzeProperties analyzeProperties= new AnalyzeProperties(properties);
-        RESULT = new AnalyzeTblStmt(tbl, cols, analyzeProperties);
+        RESULT = new AnalyzeTblStmt(tbl, partitions, cols, analyzeProperties);
     :}
     | KW_ANALYZE KW_DATABASE ident:ctlName DOT ident:dbName
           opt_with_analysis_properties:withAnalysisProperties opt_properties:properties
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
index 793902b327..01888461ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
@@ -24,6 +24,7 @@ import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.catalog.View;
+import org.apache.doris.catalog.external.HMSExternalTable;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
@@ -47,12 +48,14 @@ import java.util.stream.Collectors;
 /**
  * Column Statistics Collection Syntax:
  * ANALYZE [ SYNC ] TABLE table_name
+ * [ PARTITIONS (partition_name [, ...])]
  * [ (column_name [, ...]) ]
  * [ [WITH SYNC] | [WITH INCREMENTAL] | [WITH SAMPLE PERCENT | ROWS ] ]
  * [ PROPERTIES ('key' = 'value', ...) ];
  * <p>
  * Column histogram collection syntax:
  * ANALYZE [ SYNC ] TABLE table_name
+ * [ partitions (partition_name [, ...])]
  * [ (column_name [, ...]) ]
  * UPDATE HISTOGRAM
  * [ [ WITH SYNC ][ WITH INCREMENTAL ][ WITH SAMPLE PERCENT | ROWS ][ WITH BUCKETS ] ]
@@ -64,6 +67,7 @@ import java.util.stream.Collectors;
  * - sample percent | rows:Collect statistics by sampling. Scale and number of rows can be sampled.
  * - buckets:Specifies the maximum number of buckets generated when collecting histogram statistics.
  * - table_name: The purpose table for collecting statistics. Can be of the form `db_name.table_name`.
+ * - partition_name: The specified destination partition must be a partition that exists in `table_name`,
  * - column_name: The specified destination column must be a column that exists in `table_name`,
  * and multiple column names are separated by commas.
  * - properties:Properties used to set statistics tasks. Currently only the following configurations
@@ -79,16 +83,19 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
 
     private final TableName tableName;
     private List<String> columnNames;
+    private List<String> partitionNames;
 
     // after analyzed
     private long dbId;
     private TableIf table;
 
     public AnalyzeTblStmt(TableName tableName,
+            PartitionNames partitionNames,
             List<String> columnNames,
             AnalyzeProperties properties) {
         super(properties);
         this.tableName = tableName;
+        this.partitionNames = partitionNames == null ? null : partitionNames.getPartitionNames();
         this.columnNames = columnNames;
         this.analyzeProperties = properties;
     }
@@ -212,6 +219,30 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
                 .stream().map(Column::getName).collect(Collectors.toSet()) : Sets.newHashSet(columnNames);
     }
 
+    public Set<String> getPartitionNames() {
+        Set<String> partitions = partitionNames == null ? table.getPartitionNames() : Sets.newHashSet(partitionNames);
+        if (isSamplingPartition()) {
+            int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
+            partitions = partitions.stream().limit(partNum).collect(Collectors.toSet());
+        }
+        return partitions;
+    }
+
+    public boolean isPartitionOnly() {
+        return partitionNames != null;
+    }
+
+    public boolean isSamplingPartition() {
+        if (!(table instanceof HMSExternalTable) || partitionNames != null) {
+            return false;
+        }
+        int partNum = ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
+        if (partNum == -1 || partitionNames != null) {
+            return false;
+        }
+        return table instanceof HMSExternalTable && table.getPartitionNames().size() > partNum;
+    }
+
     @Override
     public RedirectStatus getRedirectStatus() {
         return RedirectStatus.FORWARD_NO_SYNC;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index 9270ada4ee..98c941e987 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -18,18 +18,18 @@
 package org.apache.doris.catalog.external;
 
 import org.apache.doris.catalog.Column;
-import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.HiveMetaStoreClientHelper;
 import org.apache.doris.catalog.HudiUtils;
 import org.apache.doris.catalog.Type;
-import org.apache.doris.common.Config;
+import org.apache.doris.common.DdlException;
 import org.apache.doris.datasource.HMSExternalCatalog;
 import org.apache.doris.datasource.hive.PooledHiveMetaStoreClient;
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.BaseAnalysisTask;
-import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.HiveAnalysisTask;
 import org.apache.doris.statistics.IcebergAnalysisTask;
+import org.apache.doris.statistics.StatisticsRepository;
+import org.apache.doris.statistics.TableStatistic;
 import org.apache.doris.thrift.THiveTable;
 import org.apache.doris.thrift.TTableDescriptor;
 import org.apache.doris.thrift.TTableType;
@@ -47,6 +47,7 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -352,6 +353,14 @@ public class HMSExternalTable extends ExternalTable {
         return client.getPartition(dbName, name, partitionValues);
     }
 
+    @Override
+    public Set<String> getPartitionNames() {
+        makeSureInitialized();
+        PooledHiveMetaStoreClient client = ((HMSExternalCatalog) catalog).getClient();
+        List<String> names = client.listPartitionNames(dbName, name);
+        return new HashSet<>(names);
+    }
+
     @Override
     public List<Column> initSchema() {
         makeSureInitialized();
@@ -387,13 +396,11 @@ public class HMSExternalTable extends ExternalTable {
 
     @Override
     public long estimatedRowCount() {
-        ColumnStatistic cache = Config.enable_stats
-                ? Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(id, "")
-                : ColumnStatistic.UNKNOWN;
-        if (cache.isUnKnown) {
+        try {
+            TableStatistic tableStatistic = StatisticsRepository.fetchTableLevelStats(id);
+            return tableStatistic.rowCount;
+        } catch (DdlException e) {
             return 1;
-        } else {
-            return (long) cache.count;
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 8fb2ac6763..e2897516f7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -348,6 +348,8 @@ public class SessionVariable implements Serializable, Writable {
 
     public static final String IGNORE_COMPLEX_TYPE_COLUMN = "ignore_column_with_complex_type";
 
+    public static final String EXTERNAL_TABLE_ANALYZE_PART_NUM = "external_table_analyze_part_num";
+
     public static final List<String> DEBUG_VARIABLES = ImmutableList.of(
             SKIP_DELETE_PREDICATE,
             SKIP_DELETE_BITMAP,
@@ -953,6 +955,14 @@ public class SessionVariable implements Serializable, Writable {
             needForward = true)
     public boolean enableOrcLazyMat = true;
 
+    @VariableMgr.VarAttr(
+            name = EXTERNAL_TABLE_ANALYZE_PART_NUM,
+            description = {"收集外表统计信息行数时选取的采样分区数,默认-1表示全部分区",
+                    "Number of sample partition for collecting external table line number, "
+                            + "default -1 means all partitions"},
+            needForward = false)
+    public int externalTableAnalyzePartNum = -1;
+
     @VariableMgr.VarAttr(
             name = INLINE_CTE_REFERENCED_THRESHOLD
     )
@@ -1899,6 +1909,10 @@ public class SessionVariable implements Serializable, Writable {
         return showUserDefaultRole;
     }
 
+    public int getExternalTableAnalyzePartNum() {
+        return externalTableAnalyzePartNum;
+    }
+
     /**
      * Serialize to thrift object.
      * Used for rest api.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index dea2155199..5a00e3471e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -17,12 +17,16 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.FeMetaVersion;
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.io.Writable;
+import org.apache.doris.persist.gson.GsonUtils;
 import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.gson.Gson;
+import com.google.gson.annotations.SerializedName;
 import com.google.gson.reflect.TypeToken;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -34,7 +38,6 @@ import java.lang.reflect.Type;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Set;
 import java.util.StringJoiner;
 
@@ -71,62 +74,94 @@ public class AnalysisInfo implements Writable {
         AUTOMATIC
     }
 
+    @SerializedName("jobId")
     public final long jobId;
 
+    @SerializedName("taskId")
     public final long taskId;
 
+    @SerializedName("catalogName")
     public final String catalogName;
 
+    @SerializedName("dbName")
     public final String dbName;
 
+    @SerializedName("tblName")
     public final String tblName;
 
+    @SerializedName("colToPartitions")
     public final Map<String, Set<String>> colToPartitions;
 
+    @SerializedName("partitionNames")
+    public final Set<String> partitionNames;
+
+    @SerializedName("colName")
     public final String colName;
 
+    @SerializedName("indexId")
     public final long indexId;
 
+    @SerializedName("jobType")
     public final JobType jobType;
 
+    @SerializedName("analysisMode")
     public final AnalysisMode analysisMode;
 
+    @SerializedName("analysisMethod")
     public final AnalysisMethod analysisMethod;
 
+    @SerializedName("analysisType")
     public final AnalysisType analysisType;
 
+    @SerializedName("samplePercent")
     public final int samplePercent;
 
+    @SerializedName("sampleRows")
     public final int sampleRows;
 
+    @SerializedName("maxBucketNum")
     public final int maxBucketNum;
 
+    @SerializedName("periodTimeInMs")
     public final long periodTimeInMs;
 
     // finished or failed
+    @SerializedName("lastExecTimeInMs")
     public long lastExecTimeInMs;
 
+    @SerializedName("state")
     public AnalysisState state;
 
+    @SerializedName("scheduleType")
     public final ScheduleType scheduleType;
 
+    @SerializedName("message")
     public String message;
 
     // True means this task is a table level task for external table.
     // This kind of task is mainly to collect the number of rows of a table.
+    @SerializedName("externalTableLevelTask")
     public boolean externalTableLevelTask;
 
+    @SerializedName("partitionOnly")
+    public boolean partitionOnly;
+
+    @SerializedName("samplingPartition")
+    public boolean samplingPartition;
+
     public AnalysisInfo(long jobId, long taskId, String catalogName, String dbName, String tblName,
-            Map<String, Set<String>> colToPartitions, String colName, Long indexId, JobType jobType,
-            AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
+            Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId,
+            JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
             int samplePercent, int sampleRows, int maxBucketNum, long periodTimeInMs, String message,
-            long lastExecTimeInMs, AnalysisState state, ScheduleType scheduleType, boolean isExternalTableLevelTask) {
+            long lastExecTimeInMs, AnalysisState state, ScheduleType scheduleType, boolean isExternalTableLevelTask,
+            boolean partitionOnly, boolean samplingPartition) {
         this.jobId = jobId;
         this.taskId = taskId;
         this.catalogName = catalogName;
         this.dbName = dbName;
         this.tblName = tblName;
         this.colToPartitions = colToPartitions;
+        this.partitionNames = partitionNames;
         this.colName = colName;
         this.indexId = indexId;
         this.jobType = jobType;
@@ -142,6 +177,8 @@ public class AnalysisInfo implements Writable {
         this.state = state;
         this.scheduleType = scheduleType;
         this.externalTableLevelTask = isExternalTableLevelTask;
+        this.partitionOnly = partitionOnly;
+        this.samplingPartition = samplingPartition;
     }
 
     @Override
@@ -257,71 +294,50 @@ public class AnalysisInfo implements Writable {
 
     @Override
     public void write(DataOutput out) throws IOException {
-        out.writeLong(jobId);
-        out.writeLong(taskId);
-        Text.writeString(out, catalogName);
-        Text.writeString(out, dbName);
-        Text.writeString(out, tblName);
-        out.writeInt(colToPartitions.size());
-        for (Entry<String, Set<String>> entry : colToPartitions.entrySet()) {
-            Text.writeString(out, entry.getKey());
-            out.writeInt(entry.getValue().size());
-            for (String part : entry.getValue()) {
-                Text.writeString(out, part);
-            }
-        }
-        Text.writeString(out, colName);
-        out.writeLong(indexId);
-        Text.writeString(out, jobType.toString());
-        Text.writeString(out, analysisMode.toString());
-        Text.writeString(out, analysisMethod.toString());
-        Text.writeString(out, analysisType.toString());
-        out.writeInt(samplePercent);
-        out.writeInt(sampleRows);
-        out.writeInt(maxBucketNum);
-        out.writeLong(periodTimeInMs);
-        out.writeLong(lastExecTimeInMs);
-        Text.writeString(out, state.toString());
-        Text.writeString(out, scheduleType.toString());
-        Text.writeString(out, message);
-        out.writeBoolean(externalTableLevelTask);
+        String json = GsonUtils.GSON.toJson(this);
+        Text.writeString(out, json);
     }
 
     public static AnalysisInfo read(DataInput dataInput) throws IOException {
-        AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
-        analysisInfoBuilder.setJobId(dataInput.readLong());
-        long taskId = dataInput.readLong();
-        analysisInfoBuilder.setTaskId(taskId);
-        analysisInfoBuilder.setCatalogName(Text.readString(dataInput));
-        analysisInfoBuilder.setDbName(Text.readString(dataInput));
-        analysisInfoBuilder.setTblName(Text.readString(dataInput));
-        int size = dataInput.readInt();
-        Map<String, Set<String>> colToPartitions = new HashMap<>();
-        for (int i = 0; i < size; i++) {
-            String k = Text.readString(dataInput);
-            int partSize = dataInput.readInt();
-            Set<String> parts = new HashSet<>();
-            for (int j = 0; j < partSize; j++) {
-                parts.add(Text.readString(dataInput));
+        if (Env.getCurrentEnvJournalVersion() < FeMetaVersion.VERSION_123) {
+            AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder();
+            analysisInfoBuilder.setJobId(dataInput.readLong());
+            long taskId = dataInput.readLong();
+            analysisInfoBuilder.setTaskId(taskId);
+            analysisInfoBuilder.setCatalogName(Text.readString(dataInput));
+            analysisInfoBuilder.setDbName(Text.readString(dataInput));
+            analysisInfoBuilder.setTblName(Text.readString(dataInput));
+            int size = dataInput.readInt();
+            Map<String, Set<String>> colToPartitions = new HashMap<>();
+            for (int i = 0; i < size; i++) {
+                String k = Text.readString(dataInput);
+                int partSize = dataInput.readInt();
+                Set<String> parts = new HashSet<>();
+                for (int j = 0; j < partSize; j++) {
+                    parts.add(Text.readString(dataInput));
+                }
+                colToPartitions.put(k, parts);
             }
-            colToPartitions.put(k, parts);
+            analysisInfoBuilder.setColToPartitions(colToPartitions);
+            analysisInfoBuilder.setColName(Text.readString(dataInput));
+            analysisInfoBuilder.setIndexId(dataInput.readLong());
+            analysisInfoBuilder.setJobType(JobType.valueOf(Text.readString(dataInput)));
+            analysisInfoBuilder.setAnalysisMode(AnalysisMode.valueOf(Text.readString(dataInput)));
+            analysisInfoBuilder.setAnalysisMethod(AnalysisMethod.valueOf(Text.readString(dataInput)));
+            analysisInfoBuilder.setAnalysisType(AnalysisType.valueOf(Text.readString(dataInput)));
+            analysisInfoBuilder.setSamplePercent(dataInput.readInt());
+            analysisInfoBuilder.setSampleRows(dataInput.readInt());
+            analysisInfoBuilder.setMaxBucketNum(dataInput.readInt());
+            analysisInfoBuilder.setPeriodTimeInMs(dataInput.readLong());
+            analysisInfoBuilder.setLastExecTimeInMs(dataInput.readLong());
+            analysisInfoBuilder.setState(AnalysisState.valueOf(Text.readString(dataInput)));
+            analysisInfoBuilder.setScheduleType(ScheduleType.valueOf(Text.readString(dataInput)));
+            analysisInfoBuilder.setMessage(Text.readString(dataInput));
+            analysisInfoBuilder.setExternalTableLevelTask(dataInput.readBoolean());
+            return analysisInfoBuilder.build();
+        } else {
+            String json = Text.readString(dataInput);
+            return GsonUtils.GSON.fromJson(json, AnalysisInfo.class);
         }
-        analysisInfoBuilder.setColToPartitions(colToPartitions);
-        analysisInfoBuilder.setColName(Text.readString(dataInput));
-        analysisInfoBuilder.setIndexId(dataInput.readLong());
-        analysisInfoBuilder.setJobType(JobType.valueOf(Text.readString(dataInput)));
-        analysisInfoBuilder.setAnalysisMode(AnalysisMode.valueOf(Text.readString(dataInput)));
-        analysisInfoBuilder.setAnalysisMethod(AnalysisMethod.valueOf(Text.readString(dataInput)));
-        analysisInfoBuilder.setAnalysisType(AnalysisType.valueOf(Text.readString(dataInput)));
-        analysisInfoBuilder.setSamplePercent(dataInput.readInt());
-        analysisInfoBuilder.setSampleRows(dataInput.readInt());
-        analysisInfoBuilder.setMaxBucketNum(dataInput.readInt());
-        analysisInfoBuilder.setPeriodTimeInMs(dataInput.readLong());
-        analysisInfoBuilder.setLastExecTimeInMs(dataInput.readLong());
-        analysisInfoBuilder.setState(AnalysisState.valueOf(Text.readString(dataInput)));
-        analysisInfoBuilder.setScheduleType(ScheduleType.valueOf(Text.readString(dataInput)));
-        analysisInfoBuilder.setMessage(Text.readString(dataInput));
-        analysisInfoBuilder.setExternalTableLevelTask(dataInput.readBoolean());
-        return analysisInfoBuilder.build();
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index c47b1dc7ab..a14f262edc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -33,6 +33,7 @@ public class AnalysisInfoBuilder {
     private String dbName;
     private String tblName;
     private Map<String, Set<String>> colToPartitions;
+    private Set<String> partitionNames;
     private String colName;
     private long indexId = -1L;
     private JobType jobType;
@@ -48,6 +49,8 @@ public class AnalysisInfoBuilder {
     private ScheduleType scheduleType;
     private String message = "";
     private boolean externalTableLevelTask;
+    private boolean partitionOnly;
+    private boolean samplingPartition;
 
     public AnalysisInfoBuilder() {
     }
@@ -59,6 +62,7 @@ public class AnalysisInfoBuilder {
         dbName = info.dbName;
         tblName = info.tblName;
         colToPartitions = info.colToPartitions;
+        partitionNames = info.partitionNames;
         colName = info.colName;
         indexId = info.indexId;
         jobType = info.jobType;
@@ -73,6 +77,9 @@ public class AnalysisInfoBuilder {
         lastExecTimeInMs = info.lastExecTimeInMs;
         state = info.state;
         scheduleType = info.scheduleType;
+        externalTableLevelTask = info.externalTableLevelTask;
+        partitionOnly = info.partitionOnly;
+        samplingPartition = info.samplingPartition;
     }
 
     public AnalysisInfoBuilder setJobId(long jobId) {
@@ -110,6 +117,11 @@ public class AnalysisInfoBuilder {
         return this;
     }
 
+    public AnalysisInfoBuilder setPartitionNames(Set<String> partitionNames) {
+        this.partitionNames = partitionNames;
+        return this;
+    }
+
     public AnalysisInfoBuilder setIndexId(Long indexId) {
         this.indexId = indexId;
         return this;
@@ -180,11 +192,21 @@ public class AnalysisInfoBuilder {
         return this;
     }
 
+    public AnalysisInfoBuilder setPartitionOnly(boolean isPartitionOnly) {
+        this.partitionOnly = isPartitionOnly;
+        return this;
+    }
+
+    public AnalysisInfoBuilder setSamplingPartition(boolean samplingPartition) {
+        this.samplingPartition = samplingPartition;
+        return this;
+    }
+
     public AnalysisInfo build() {
-        return new AnalysisInfo(jobId, taskId, catalogName, dbName, tblName, colToPartitions,
+        return new AnalysisInfo(jobId, taskId, catalogName, dbName, tblName, colToPartitions, partitionNames,
                 colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent,
                 sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, state, scheduleType,
-                externalTableLevelTask);
+                externalTableLevelTask, partitionOnly, samplingPartition);
     }
 
     public AnalysisInfoBuilder copy() {
@@ -209,6 +231,6 @@ public class AnalysisInfoBuilder {
                 .setLastExecTimeInMs(lastExecTimeInMs)
                 .setState(state)
                 .setScheduleType(scheduleType)
-                .setExternalTableLevelTask(false);
+                .setExternalTableLevelTask(externalTableLevelTask);
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 789478d501..18fcb8d82e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -34,6 +34,7 @@ import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.catalog.TableIf.TableType;
 import org.apache.doris.catalog.View;
+import org.apache.doris.catalog.external.HMSExternalTable;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
@@ -296,9 +297,8 @@ public class AnalysisManager extends Daemon implements Writable {
      * TODO Supports incremental collection of statistics from materialized views
      */
     private Map<String, Set<String>> validateAndGetPartitions(TableIf table, Set<String> columnNames,
-            AnalysisType analysisType, AnalysisMode analysisMode) throws DdlException {
+            Set<String> partitionNames, AnalysisType analysisType, AnalysisMode analysisMode) throws DdlException {
         long tableId = table.getId();
-        Set<String> partitionNames = table.getPartitionNames();
 
         Map<String, Set<String>> columnToPartitions = columnNames.stream()
                 .collect(Collectors.toMap(
@@ -312,6 +312,13 @@ public class AnalysisManager extends Daemon implements Writable {
             return columnToPartitions;
         }
 
+        if (table instanceof HMSExternalTable) {
+            // TODO Currently, we do not support INCREMENTAL collection for external table.
+            // One reason is external table partition id couldn't convert to a Long value.
+            // Will solve this problem later.
+            return columnToPartitions;
+        }
+
         // Get the partition granularity statistics that have been collected
         Map<String, Set<Long>> existColAndPartsForStats = StatisticsRepository
                 .fetchColAndPartsForStats(tableId);
@@ -365,6 +372,9 @@ public class AnalysisManager extends Daemon implements Writable {
         String tblName = tbl.getTbl();
         TableIf table = stmt.getTable();
         Set<String> columnNames = stmt.getColumnNames();
+        Set<String> partitionNames = stmt.getPartitionNames();
+        boolean partitionOnly = stmt.isPartitionOnly();
+        boolean isSamplingPartition = stmt.isSamplingPartition();
         int samplePercent = stmt.getSamplePercent();
         int sampleRows = stmt.getSampleRows();
         AnalysisType analysisType = stmt.getAnalysisType();
@@ -381,6 +391,9 @@ public class AnalysisManager extends Daemon implements Writable {
             stringJoiner.add(colName);
         }
         taskInfoBuilder.setColName(stringJoiner.toString());
+        taskInfoBuilder.setPartitionNames(partitionNames);
+        taskInfoBuilder.setPartitionOnly(partitionOnly);
+        taskInfoBuilder.setSamplingPartition(isSamplingPartition);
         taskInfoBuilder.setJobType(JobType.MANUAL);
         taskInfoBuilder.setState(AnalysisState.PENDING);
         taskInfoBuilder.setAnalysisType(analysisType);
@@ -406,8 +419,8 @@ public class AnalysisManager extends Daemon implements Writable {
             taskInfoBuilder.setPeriodTimeInMs(periodTimeInMs);
         }
 
-        Map<String, Set<String>> colToPartitions = validateAndGetPartitions(table,
-                columnNames, analysisType, analysisMode);
+        Map<String, Set<String>> colToPartitions = validateAndGetPartitions(table, columnNames,
+                partitionNames, analysisType, analysisMode);
         taskInfoBuilder.setColToPartitions(colToPartitions);
 
         return taskInfoBuilder.build();
@@ -433,8 +446,8 @@ public class AnalysisManager extends Daemon implements Writable {
         try {
             TableIf table = StatisticsUtil
                     .findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName);
-            Map<String, Set<String>> colToPartitions = validateAndGetPartitions(table,
-                    jobInfo.colToPartitions.keySet(), jobInfo.analysisType, jobInfo.analysisMode);
+            Map<String, Set<String>> colToPartitions = validateAndGetPartitions(table, jobInfo.colToPartitions.keySet(),
+                    jobInfo.partitionNames, jobInfo.analysisType, jobInfo.analysisMode);
             taskInfoBuilder.setColToPartitions(colToPartitions);
         } catch (Throwable e) {
             throw new RuntimeException(e);
@@ -547,6 +560,10 @@ public class AnalysisManager extends Daemon implements Writable {
         AnalysisInfo analysisInfo = colTaskInfoBuilder.setIndexId(-1L)
                 .setTaskId(taskId).setExternalTableLevelTask(true).build();
         analysisTasks.put(taskId, createTask(analysisInfo));
+        if (isSync) {
+            // For sync job, don't need to persist, return here and execute it immediately.
+            return;
+        }
         try {
             logCreateAnalysisJob(analysisInfo);
         } catch (Exception e) {
@@ -604,7 +621,8 @@ public class AnalysisManager extends Daemon implements Writable {
             updateOlapTableStats(table, params);
         }
 
-        // TODO support external table
+        // External Table doesn't collect table stats here.
+        // We create task for external table to collect table/partition level statistics.
     }
 
     @SuppressWarnings("rawtypes")
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 5735c01430..8aecb8bb4e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -147,9 +147,10 @@ public class ColumnStatistic {
             String colName = resultRow.getColumnValue("col_id");
             Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName);
             if (col == null) {
-                // Col is null indicates this information is external table level info,
-                // which doesn't have a column.
-                return columnStatisticBuilder.build();
+                LOG.warn("Failed to deserialize column statistics, ctlId: {} dbId: {}"
+                        + "tblId: {} column: {} not exists",
+                        catalogId, dbID, tblId, colName);
+                return ColumnStatistic.UNKNOWN;
             }
             String min = resultRow.getColumnValue("min");
             String max = resultRow.getColumnValue("max");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
index 0e358857ca..be1fd516af 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java
@@ -20,13 +20,14 @@ package org.apache.doris.statistics;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.util.TimeUtils;
-import org.apache.doris.datasource.HMSExternalCatalog;
 import org.apache.doris.qe.AutoCloseConnectContext;
 import org.apache.doris.qe.StmtExecutor;
 import org.apache.doris.statistics.util.InternalQueryResult;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.text.StringSubstitutor;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -50,6 +51,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 public class HiveAnalysisTask extends HMSAnalysisTask {
     private static final Logger LOG = LogManager.getLogger(HiveAnalysisTask.class);
@@ -83,10 +85,16 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
             + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
 
     private final boolean isTableLevelTask;
+    private final boolean isSamplingPartition;
+    private final boolean isPartitionOnly;
+    private final Set<String> partitionNames;
 
     public HiveAnalysisTask(AnalysisInfo info) {
         super(info);
         isTableLevelTask = info.externalTableLevelTask;
+        isSamplingPartition = info.samplingPartition;
+        isPartitionOnly = info.partitionOnly;
+        partitionNames = info.partitionNames;
     }
 
     private static final String ANALYZE_META_TABLE_COLUMN_TEMPLATE = "INSERT INTO "
@@ -109,54 +117,144 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
      */
     @Override
     protected void getStatsBySql() throws Exception {
-        getTableStatsBySql();
-        getPartitionStatsBySql();
-        getTableColumnStatsBySql();
-        getPartitionColumnStatsBySql();
+        if (isTableLevelTask) {
+            getTableStatsBySql();
+        } else {
+            getTableColumnStatsBySql();
+        }
     }
 
     /**
      * Get table row count and insert the result to __internal_schema.table_statistics
      */
     private void getTableStatsBySql() throws Exception {
-        Map<String, String> params = buildTableStatsParams();
-        List<InternalQueryResult.ResultRow> columnResult =
-                StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
-                .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
-        String rowCount = columnResult.get(0).getColumnValue("rowCount");
-        params.put("rowCount", rowCount);
-        StatisticsRepository.persistTableStats(params);
+        // Get table level information. An example sql for table stats:
+        // INSERT INTO __internal_schema.table_statistics VALUES
+        //   ('13055', 13002, 13038, 13055, -1, 'NULL', 5, 1686111064658, NOW())
+        Map<String, String> parameters = table.getRemoteTable().getParameters();
+        if (isPartitionOnly) {
+            for (String partId : partitionNames) {
+                StringBuilder sb = new StringBuilder();
+                sb.append(ANALYZE_TABLE_COUNT_TEMPLATE);
+                sb.append(" where ");
+                String[] splits = partId.split("/");
+                for (int i = 0; i < splits.length; i++) {
+                    String value = splits[i].split("=")[1];
+                    splits[i] = splits[i].replace(value, "\'" + value + "\'");
+                }
+                sb.append(StringUtils.join(splits, " and "));
+                Map<String, String> params = buildTableStatsParams(partId);
+                setParameterData(parameters, params);
+                List<InternalQueryResult.ResultRow> columnResult =
+                        StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
+                        .replace(sb.toString()));
+                String rowCount = columnResult.get(0).getColumnValue("rowCount");
+                params.put("rowCount", rowCount);
+                StatisticsRepository.persistTableStats(params);
+            }
+        } else {
+            Map<String, String> params = buildTableStatsParams("NULL");
+            List<InternalQueryResult.ResultRow> columnResult =
+                    StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
+                    .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
+            String rowCount = columnResult.get(0).getColumnValue("rowCount");
+            params.put("rowCount", rowCount);
+            StatisticsRepository.persistTableStats(params);
+        }
     }
 
     /**
      * Get column statistics and insert the result to __internal_schema.column_statistics
      */
     private void getTableColumnStatsBySql() throws Exception {
-        Map<String, String> params = buildTableStatsParams();
-        params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
-        params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
-        params.put("colName", col.getName());
-        params.put("colId", info.colName);
-        params.put("dataSizeFunction", getDataSizeFunction(col));
-        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
-        String sql = stringSubstitutor.replace(ANALYZE_SQL_TABLE_TEMPLATE);
-        try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
-            r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
-            this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
-            this.stmtExecutor.execute();
+        // An example sql for a column stats:
+        // INSERT INTO __internal_schema.column_statistics
+        //   SELECT CONCAT(13055, '-', -1, '-', 'r_regionkey') AS id,
+        //   13002 AS catalog_id,
+        //   13038 AS db_id,
+        //   13055 AS tbl_id,
+        //   -1 AS idx_id,
+        //   'r_regionkey' AS col_id,
+        //   'NULL' AS part_id,
+        //   COUNT(1) AS row_count,
+        //   NDV(`r_regionkey`) AS ndv,
+        //   SUM(CASE WHEN `r_regionkey` IS NULL THEN 1 ELSE 0 END) AS null_count,
+        //   MIN(`r_regionkey`) AS min,
+        //   MAX(`r_regionkey`) AS max,
+        //   0 AS data_size,
+        //   NOW() FROM `hive`.`tpch100`.`region`
+        if (isPartitionOnly) {
+            for (String partId : partitionNames) {
+                StringBuilder sb = new StringBuilder();
+                sb.append(ANALYZE_SQL_TABLE_TEMPLATE);
+                sb.append(" where ");
+                String[] splits = partId.split("/");
+                for (int i = 0; i < splits.length; i++) {
+                    String value = splits[i].split("=")[1];
+                    splits[i] = splits[i].replace(value, "\'" + value + "\'");
+                }
+                sb.append(StringUtils.join(splits, " and "));
+                Map<String, String> params = buildTableStatsParams(partId);
+                params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
+                params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
+                params.put("colName", col.getName());
+                params.put("colId", info.colName);
+                params.put("dataSizeFunction", getDataSizeFunction(col));
+                StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
+                String sql = stringSubstitutor.replace(sb.toString());
+                try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
+                    r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
+                    this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
+                    this.stmtExecutor.execute();
+                }
+            }
+        } else {
+            StringBuilder sb = new StringBuilder();
+            sb.append(ANALYZE_SQL_TABLE_TEMPLATE);
+            if (isSamplingPartition) {
+                sb.append(" where 1=1 ");
+                String[] splitExample = partitionNames.stream().findFirst().get().split("/");
+                int parts = splitExample.length;
+                List<String> partNames = new ArrayList<>();
+                for (String split : splitExample) {
+                    partNames.add(split.split("=")[0]);
+                }
+                List<List<String>> valueLists = new ArrayList<>();
+                for (int i = 0; i < parts; i++) {
+                    valueLists.add(new ArrayList<>());
+                }
+                for (String partId : partitionNames) {
+                    String[] partIds = partId.split("/");
+                    for (int i = 0; i < partIds.length; i++) {
+                        valueLists.get(i).add("\'" + partIds[i].split("=")[1] + "\'");
+                    }
+                }
+                for (int i = 0; i < parts; i++) {
+                    sb.append(" and ");
+                    sb.append(partNames.get(i));
+                    sb.append(" in (");
+                    sb.append(StringUtils.join(valueLists.get(i), ","));
+                    sb.append(") ");
+                }
+            }
+            Map<String, String> params = buildTableStatsParams("NULL");
+            params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
+            params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
+            params.put("colName", col.getName());
+            params.put("colId", info.colName);
+            params.put("dataSizeFunction", getDataSizeFunction(col));
+            StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
+            String sql = stringSubstitutor.replace(sb.toString());
+            try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
+                r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
+                this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
+                this.stmtExecutor.execute();
+            }
+            Env.getCurrentEnv().getStatisticsCache().refreshColStatsSync(tbl.getId(), -1, col.getName());
         }
-        Env.getCurrentEnv().getStatisticsCache().refreshColStatsSync(tbl.getId(), -1, col.getName());
-    }
-
-    private void getPartitionStatsBySql() {
-        // TODO: Collect partition stats by sql.
     }
 
-    private void getPartitionColumnStatsBySql() {
-        // TODO: Collect partition column stats by sql.
-    }
-
-    private Map<String, String> buildTableStatsParams() {
+    private Map<String, String> buildTableStatsParams(String partId) {
         Map<String, String> commonParams = new HashMap<>();
         commonParams.put("id", String.valueOf(tbl.getId()));
         commonParams.put("catalogId", String.valueOf(catalog.getId()));
@@ -164,11 +262,13 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
         commonParams.put("tblId", String.valueOf(tbl.getId()));
         commonParams.put("indexId", "-1");
         commonParams.put("idxId", "-1");
-        commonParams.put("partId", "NULL");
+        commonParams.put("partId", "\'" + partId + "\'");
         commonParams.put("catalogName", catalog.getName());
         commonParams.put("dbName", db.getFullName());
         commonParams.put("tblName", tbl.getName());
-        commonParams.put("type", col.getType().toString());
+        if (col != null) {
+            commonParams.put("type", col.getType().toString());
+        }
         commonParams.put("lastAnalyzeTimeInMs", String.valueOf(System.currentTimeMillis()));
         return commonParams;
     }
@@ -183,28 +283,20 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
     }
 
     protected void getTableStatsByMeta() throws Exception {
-        Map<String, String> params = new HashMap<>();
-        params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
-        params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
-        params.put("catalogId", String.valueOf(catalog.getId()));
-        params.put("dbId", String.valueOf(db.getId()));
-        params.put("tblId", String.valueOf(tbl.getId()));
-        params.put("colId", "");
-
         // Get table level information.
         Map<String, String> parameters = table.getRemoteTable().getParameters();
-        // Collect table level row count, null number and timestamp.
-        setParameterData(parameters, params);
-        if (parameters.containsKey(TOTAL_SIZE)) {
-            params.put("dataSize", parameters.get(TOTAL_SIZE));
-        }
-        params.put("id", genColumnStatId(tbl.getId(), -1, "", null));
-        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
-        String sql = stringSubstitutor.replace(ANALYZE_META_TABLE_TEMPLATE);
-        try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
-            r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
-            this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
-            this.stmtExecutor.execute();
+        if (isPartitionOnly) {
+            for (String partId : partitionNames) {
+                Map<String, String> params = buildTableStatsParams(partId);
+                // Collect table level row count, null number and timestamp.
+                setParameterData(parameters, params);
+                StatisticsRepository.persistTableStats(params);
+            }
+        } else {
+            Map<String, String> params = buildTableStatsParams("NULL");
+            // Collect table level row count, null number and timestamp.
+            setParameterData(parameters, params);
+            StatisticsRepository.persistTableStats(params);
         }
     }
 
@@ -221,31 +313,34 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
 
         // Get table level information.
         Map<String, String> parameters = table.getRemoteTable().getParameters();
-        // Collect table level row count, null number and timestamp.
-        setParameterData(parameters, params);
-        params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), null));
-        List<ColumnStatisticsObj> tableStats = table.getHiveTableColumnStats(columns);
-        long rowCount = parameters.containsKey(NUM_ROWS) ? Long.parseLong(parameters.get(NUM_ROWS)) : 0;
-        // Collect table level ndv, nulls, min and max. tableStats contains at most 1 item;
-        for (ColumnStatisticsObj tableStat : tableStats) {
-            if (!tableStat.isSetStatsData()) {
-                continue;
+        long rowCount;
+        StringSubstitutor stringSubstitutor;
+        if (isPartitionOnly) {
+            // Collect table level row count, null number and timestamp.
+            setParameterData(parameters, params);
+            params.put("id", genColumnStatId(tbl.getId(), -1, col.getName(), null));
+            List<ColumnStatisticsObj> tableStats = table.getHiveTableColumnStats(columns);
+            rowCount = parameters.containsKey(NUM_ROWS) ? Long.parseLong(parameters.get(NUM_ROWS)) : 0;
+            // Collect table level ndv, nulls, min and max. tableStats contains at most 1 item;
+            for (ColumnStatisticsObj tableStat : tableStats) {
+                if (!tableStat.isSetStatsData()) {
+                    continue;
+                }
+                ColumnStatisticsData data = tableStat.getStatsData();
+                getStatData(data, params, rowCount);
+            }
+            stringSubstitutor = new StringSubstitutor(params);
+            String sql = stringSubstitutor.replace(ANALYZE_META_TABLE_COLUMN_TEMPLATE);
+            try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
+                r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
+                this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
+                this.stmtExecutor.execute();
             }
-            ColumnStatisticsData data = tableStat.getStatsData();
-            getStatData(data, params, rowCount);
-        }
-        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
-        String sql = stringSubstitutor.replace(ANALYZE_META_TABLE_COLUMN_TEMPLATE);
-        try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
-            r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
-            this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
-            this.stmtExecutor.execute();
         }
 
         // Get partition level information.
-        List<String> partitions = ((HMSExternalCatalog)
-                catalog).getClient().listPartitionNames(db.getFullName(), table.getName());
-        Map<String, List<ColumnStatisticsObj>> columnStats = table.getHivePartitionColumnStats(partitions, columns);
+        Map<String, List<ColumnStatisticsObj>> columnStats
+                = table.getHivePartitionColumnStats(Lists.newArrayList(partitionNames), columns);
         List<String> partitionAnalysisSQLs = new ArrayList<>();
         for (Map.Entry<String, List<ColumnStatisticsObj>> entry : columnStats.entrySet()) {
             String partName = entry.getKey();
@@ -368,6 +463,7 @@ public class HiveAnalysisTask extends HMSAnalysisTask {
             timestamp = parameters.get(TIMESTAMP);
         }
         params.put("numRows", numRows);
+        params.put("rowCount", numRows);
         params.put("update_time", TimeUtils.DATETIME_FORMAT.format(
                 LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(timestamp) * 1000),
                         ZoneId.systemDefault())));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org