You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/10/09 08:34:27 UTC

[doris] branch master updated: [enhancement](statistics) optimize the default configuration related to statistics, etc. (#13136)

This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 15fc3c2c89 [enhancement](statistics) optimize the default configuration related to statistics, etc. (#13136)
15fc3c2c89 is described below

commit 15fc3c2c896e5a2949be7f47cc4802e1077c93d8
Author: ElvinWei <zh...@outlook.com>
AuthorDate: Sun Oct 9 16:34:20 2022 +0800

    [enhancement](statistics) optimize the default configuration related to statistics, etc. (#13136)
    
    This pr is mainly to optimize statistical tasks. Includes the following:
    1. No longer generate statistics tasks for empty tables, and move the logic of skipping empty partitions to the process of task generation.
    2. Adjusted the default configuration related to statistics to improve the efficiency of statistics collection, parameters include `cbo_concurrency_statistics_task_num`,`statistic_job_scheduler_execution_interval_ms`  and `statistic_task_scheduler_execution_interval_ms`.
    3. Optimize the display of statistical tasks.
    4. In addition, some `org.apache.parquet.Strings` packages are changed to `com.google.common.base.Strings` to avoid the exception that Strings cannot be found in local debug.
    
    etc.
---
 .../org/apache/doris/analysis/AnalyzeStmt.java     | 20 ++++------------
 .../clone/ColocateTableCheckerAndBalancer.java     |  2 +-
 .../main/java/org/apache/doris/common/Config.java  |  6 ++---
 .../java/org/apache/doris/policy/PolicyMgr.java    |  2 +-
 .../org/apache/doris/statistics/StatisticsJob.java |  8 ++++++-
 .../doris/statistics/StatisticsJobManager.java     |  6 +++++
 .../doris/statistics/StatisticsJobScheduler.java   | 28 +++++++++++++++-------
 .../apache/doris/statistics/StatisticsTask.java    |  2 +-
 .../statistics/StatisticsJobSchedulerTest.java     | 17 +++++++++----
 9 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
index 2da3cf70c0..3b6004aeb3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java
@@ -43,6 +43,7 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import org.apache.commons.lang.StringUtils;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -150,11 +151,11 @@ public class AnalyzeStmt extends DdlStmt {
             try {
                 OlapTable olapTable = (OlapTable) table;
                 List<String> partitionNames = getPartitionNames();
-                if (partitionNames.isEmpty() && olapTable.isPartitioned()) {
-                    partitionNames.addAll(olapTable.getPartitionNames());
+                List<String> newPartitionNames = new ArrayList<>(partitionNames);
+                if (newPartitionNames.isEmpty() && olapTable.isPartitioned()) {
+                    newPartitionNames.addAll(olapTable.getPartitionNames());
                 }
-                List<String> notEmptyPartition = getNotEmptyPartition(olapTable, partitionNames);
-                tableIdToPartitionName.put(table.getId(), notEmptyPartition);
+                tableIdToPartitionName.put(table.getId(), newPartitionNames);
             } finally {
                 table.readUnlock();
             }
@@ -332,17 +333,6 @@ public class AnalyzeStmt extends DdlStmt {
         optProperties.put(CBO_STATISTICS_TASK_TIMEOUT_SEC, String.valueOf(taskTimeout));
     }
 
-    private List<String> getNotEmptyPartition(OlapTable olapTable, List<String> partitionNames) {
-        List<String> notEmptyPartition = Lists.newArrayList();
-        for (String partitionName : partitionNames) {
-            Partition partition = olapTable.getPartition(partitionName);
-            if (partition != null && partition.getDataSize() > 0) {
-                notEmptyPartition.add(partitionName);
-            }
-        }
-        return notEmptyPartition;
-    }
-
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
index c7848b3937..490743a67f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
@@ -41,13 +41,13 @@ import org.apache.doris.system.Backend;
 import org.apache.doris.system.SystemInfoService;
 
 import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.common.collect.Table;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.parquet.Strings;
 
 import java.util.List;
 import java.util.Map;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
index 14aa70d0ce..9d05f54679 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java
@@ -1639,7 +1639,7 @@ public class Config extends ConfigBase {
      */
     // TODO change it to mutable true
     @ConfField(mutable = false, masterOnly = true)
-    public static int cbo_concurrency_statistics_task_num = 1;
+    public static int cbo_concurrency_statistics_task_num = 10;
     /*
      * default sample percentage
      * The value from 0 ~ 100. The 100 means no sampling and fetch all data.
@@ -1788,8 +1788,8 @@ public class Config extends ConfigBase {
     public static int be_exec_version = max_be_exec_version;
 
     @ConfField(mutable = false)
-    public static int statistic_job_scheduler_execution_interval_ms = 60 * 60 * 1000;
+    public static int statistic_job_scheduler_execution_interval_ms = 60 * 1000;
 
     @ConfField(mutable = false)
-    public static int statistic_task_scheduler_execution_interval_ms = 60 * 60 * 1000;
+    public static int statistic_task_scheduler_execution_interval_ms = 60 * 1000;
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java
index 9571452bf8..e360b98cc4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/policy/PolicyMgr.java
@@ -33,13 +33,13 @@ import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.ShowResultSet;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.gson.annotations.SerializedName;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.parquet.Strings;
 
 import java.io.DataInput;
 import java.io.DataOutput;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java
index e233d433bd..c369da43b9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJob.java
@@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.text.SimpleDateFormat;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -338,10 +339,15 @@ public class StatisticsJob {
             }
         }
 
+        // exclude invalid info
+        if (scope.isEmpty()) {
+            return Collections.emptyList();
+        }
+
         result.add(StringUtils.join(scope.toArray(), ","));
         result.add(finishedTaskNum + "/" + totalTaskNum);
 
-        if (totalTaskNum == finishedTaskNum) {
+        if (totalTaskNum > 0 && totalTaskNum == finishedTaskNum) {
             result.add("FINISHED");
         } else {
             result.add(jobState.toString());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java
index 3bb74af732..37966dc1fe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java
@@ -158,6 +158,9 @@ public class StatisticsJobManager {
                 }
                 if (jobState == null || jobState == statisticsJob.getJobState()) {
                     List<Comparable> showInfo = statisticsJob.getShowInfo(null);
+                    if (showInfo == null || showInfo.isEmpty()) {
+                        continue;
+                    }
                     results.add(showInfo);
                 }
             }
@@ -176,6 +179,9 @@ public class StatisticsJobManager {
                         set.retainAll(tblIds);
                         for (long tblId : set) {
                             List<Comparable> showInfo = statisticsJob.getShowInfo(tblId);
+                            if (showInfo == null || showInfo.isEmpty()) {
+                                continue;
+                            }
                             results.add(showInfo);
                         }
                     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
index 006076b4a0..4e492d6e30 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobScheduler.java
@@ -147,15 +147,15 @@ public class StatisticsJobScheduler extends MasterDaemon {
 
         for (Long tblId : tblIds) {
             Optional<Table> optionalTbl = db.getTable(tblId);
-            if (!optionalTbl.isPresent()) {
-                LOG.warn("Table(id={}) not found in the database {}", tblId, db.getFullName());
-                continue;
-            }
-            Table table = optionalTbl.get();
-            if (!table.isPartitioned()) {
-                getStatsTaskByTable(job, tblId);
+            if (optionalTbl.isPresent()) {
+                Table table = optionalTbl.get();
+                if (!table.isPartitioned()) {
+                    getStatsTaskByTable(job, tblId);
+                } else {
+                    getStatsTaskByPartition(job, tblId);
+                }
             } else {
-                getStatsTaskByPartition(job, tblId);
+                LOG.warn("Table(id={}) not found in the database {}", tblId, db.getFullName());
             }
         }
     }
@@ -171,6 +171,11 @@ public class StatisticsJobScheduler extends MasterDaemon {
         Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(job.getDbId());
         OlapTable table = (OlapTable) db.getTableOrDdlException(tableId);
 
+        if (table.getDataSize() == 0) {
+            LOG.info("Do not collect statistics for empty table {}", table.getName());
+            return;
+        }
+
         Map<Long, List<String>> tblIdToColName = job.getTableIdToColumnName();
         List<String> colNames = tblIdToColName.get(tableId);
 
@@ -202,7 +207,7 @@ public class StatisticsJobScheduler extends MasterDaemon {
         for (String colName : colNames) {
             Column column = table.getColumn(colName);
             if (column == null) {
-                LOG.info("column {} not found in table {}", colName, table.getName());
+                LOG.info("Column {} not found in table {}", colName, table.getName());
                 continue;
             }
             Type colType = column.getType();
@@ -325,6 +330,11 @@ public class StatisticsJobScheduler extends MasterDaemon {
                 LOG.info("Partition {} not found in the table {}", partitionName, table.getName());
                 continue;
             }
+            if (partition.getDataSize() == 0) {
+                LOG.info("Do not collect statistics for empty partition {} in the table {}",
+                        partitionName, table.getName());
+                continue;
+            }
 
             long partitionId = partition.getId();
             long rowCount = partition.getBaseIndex().getRowCount();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java
index 8d386f724c..9428c17df1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsTask.java
@@ -135,7 +135,7 @@ public abstract class StatisticsTask implements Callable<StatisticsTaskResult> {
             throw new DdlException(errorMsg + taskState + " to " + newState);
         }
 
-        LOG.info("Statistics job(id={}) state changed from {} to {}", id, taskState, newState);
+        LOG.info("Statistics task(id={}) state changed from {} to {}", id, taskState, newState);
         taskState = newState;
     }
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java
index 717fd7aa14..e715910af0 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobSchedulerTest.java
@@ -71,10 +71,12 @@ public class StatisticsJobSchedulerTest {
         // Setup
         Column col1 = new Column("c1", PrimitiveType.STRING);
         Column col2 = new Column("c2", PrimitiveType.INT);
-        OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2), KeysType.AGG_KEYS,
-                new PartitionInfo(), new HashDistributionInfo());
-        OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2), KeysType.DUP_KEYS,
-                new PartitionInfo(), new HashDistributionInfo());
+
+        OlapTable tbl1 = new OlapTable(0L, "tbl1", Arrays.asList(col1, col2),
+                KeysType.AGG_KEYS, new PartitionInfo(), new HashDistributionInfo());
+        OlapTable tbl2 = new OlapTable(1L, "tbl2", Arrays.asList(col1, col2),
+                KeysType.DUP_KEYS, new PartitionInfo(), new HashDistributionInfo());
+
         Database database = new Database(0L, "db");
         database.createTable(tbl1);
         database.createTable(tbl2);
@@ -95,6 +97,13 @@ public class StatisticsJobSchedulerTest {
             }
         };
 
+        new MockUp<OlapTable>(OlapTable.class) {
+            @Mock
+            public long getDataSize() {
+                return 1L;
+            }
+        };
+
         // Run the test
         statisticsJobSchedulerUnderTest.runAfterCatalogReady();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org