You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/04/10 15:24:54 UTC
[GitHub] [incubator-doris] morrySnow commented on a diff in pull request #8861: [feature-wip](statistics) step4: collect statistics by implementing statistics tasks

morrySnow commented on code in PR #8861:
URL: https://github.com/apache/incubator-doris/pull/8861#discussion_r846795617


##########
fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobManager.java:
##########
@@ -19,55 +19,155 @@
 
 import org.apache.doris.analysis.AnalyzeStmt;
 import org.apache.doris.catalog.Catalog;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.Config;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.UserException;
 
+import com.google.common.base.Strings;
 import com.google.common.collect.Maps;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
-/*
-For unified management of statistics job,
-including job addition, cancellation, scheduling, etc.
+/**
+ * For unified management of statistics job,
+ * including job addition, cancellation, scheduling, etc.
  */
 public class StatisticsJobManager {
     private static final Logger LOG = LogManager.getLogger(StatisticsJobManager.class);
 
-    // statistics job
-    private Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap();
+    /**
+     * save statistics job status information
+     */
+    private final Map<Long, StatisticsJob> idToStatisticsJob = Maps.newConcurrentMap();
 
-    public void createStatisticsJob(AnalyzeStmt analyzeStmt) {
-        // step0: init statistics job by analyzeStmt
+    private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
+
+    public void readLock() {
+        this.lock.readLock().lock();
+    }
+
+    public void readUnlock() {
+        this.lock.readLock().unlock();
+    }
+
+    private void writeLock() {
+        this.lock.writeLock().lock();
+    }
+
+    private void writeUnlock() {
+        this.lock.writeLock().unlock();
+    }
+
+    public Map<Long, StatisticsJob> getIdToStatisticsJob() {
+        return this.idToStatisticsJob;
+    }
+
+    public void createStatisticsJob(AnalyzeStmt analyzeStmt) throws UserException {
+        // step1: init statistics job by analyzeStmt
         StatisticsJob statisticsJob = StatisticsJob.fromAnalyzeStmt(analyzeStmt);
-        // step1: get statistics to be analyzed
-        Set<Long> tableIdList = statisticsJob.relatedTableId();
-        // step2: check restrict
-        checkRestrict(tableIdList);
-        // step3: check permission
-        checkPermission();
-        // step4: create it
-        createStatisticsJob(statisticsJob);
+        writeLock();
+        try {
+            // step2: check restrict
+            this.checkRestrict(analyzeStmt.getDb(), statisticsJob.getTblIds());
+            // step3: create it
+            this.createStatisticsJob(statisticsJob);
+        } finally {
+            writeUnlock();
+        }
     }
 
-    public void createStatisticsJob(StatisticsJob statisticsJob) {
-        idToStatisticsJob.put(statisticsJob.getId(), statisticsJob);
+    public void createStatisticsJob(StatisticsJob statisticsJob) throws DdlException {
+        // assign the id when the job is ready to run
+        statisticsJob.setId(Catalog.getCurrentCatalog().getNextId());
+        this.idToStatisticsJob.put(statisticsJob.getId(), statisticsJob);
         try {
             Catalog.getCurrentCatalog().getStatisticsJobScheduler().addPendingJob(statisticsJob);
         } catch (IllegalStateException e) {
             LOG.info("The pending statistics job is full. Please submit it again later.");
+            throw new DdlException("The pending statistics job is full, Please submit it again later.");
         }
     }
 
-    // Rule1: The same table cannot have two unfinished statistics jobs
-    // Rule2: The unfinished statistics job could not more then Config.max_statistics_job_num
-    // Rule3: The job for external table is not supported
-    private void checkRestrict(Set<Long> tableIdList) {
-        // TODO
+    /**
+     * The statistical job has the following restrict:
+     * - Rule1: The same table cannot have two unfinished statistics jobs
+     * - Rule2: The unfinished statistics job could not more then Config.max_statistics_job_num

Review Comment:
   typo: then -> than



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java:
##########
@@ -17,21 +17,84 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.catalog.Catalog;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.common.DdlException;
+
+import com.google.common.collect.Maps;
+
 import java.util.List;
+import java.util.Map;
 
 /*
 A statistics task that directly collects statistics by reading FE meta.
  */
 public class MetaStatisticsTask extends StatisticsTask {
 
-    public MetaStatisticsTask(long jobId, StatsGranularityDesc granularityDesc,
-                              StatsCategoryDesc categoryDesc, List<StatsType> statsTypeList) {
+    public MetaStatisticsTask(long jobId,
+                              StatsGranularityDesc granularityDesc,
+                              StatsCategoryDesc categoryDesc,
+                              List<StatsType> statsTypeList) {
         super(jobId, granularityDesc, categoryDesc, statsTypeList);
     }
 
     @Override
     public StatisticsTaskResult call() throws Exception {
-        // TODO
-        return null;
+        Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
+        List<StatsType> statsTypeList = this.getStatsTypeList();
+
+        for (StatsType statsType : statsTypeList) {
+            switch (statsType) {
+                case ROW_COUNT:
+                    getRowCount(statsType, statsTypeToValue);
+                    break;
+                case DATA_SIZE:
+                    getDataSize(statsType, statsTypeToValue);
+                    break;
+                case MAX_SIZE:
+                case AVG_SIZE:

Review Comment:
   why max_size == avg_size?



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/MetaStatisticsTask.java:
##########
@@ -17,21 +17,84 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.catalog.Catalog;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.common.DdlException;
+
+import com.google.common.collect.Maps;
+
 import java.util.List;
+import java.util.Map;
 
 /*
 A statistics task that directly collects statistics by reading FE meta.
  */
 public class MetaStatisticsTask extends StatisticsTask {
 
-    public MetaStatisticsTask(long jobId, StatsGranularityDesc granularityDesc,
-                              StatsCategoryDesc categoryDesc, List<StatsType> statsTypeList) {
+    public MetaStatisticsTask(long jobId,
+                              StatsGranularityDesc granularityDesc,
+                              StatsCategoryDesc categoryDesc,
+                              List<StatsType> statsTypeList) {
         super(jobId, granularityDesc, categoryDesc, statsTypeList);
     }
 
     @Override
     public StatisticsTaskResult call() throws Exception {
-        // TODO
-        return null;
+        Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
+        List<StatsType> statsTypeList = this.getStatsTypeList();
+
+        for (StatsType statsType : statsTypeList) {
+            switch (statsType) {
+                case ROW_COUNT:
+                    getRowCount(statsType, statsTypeToValue);
+                    break;
+                case DATA_SIZE:
+                    getDataSize(statsType, statsTypeToValue);
+                    break;
+                case MAX_SIZE:
+                case AVG_SIZE:

Review Comment:
   change to collect correct info from execute task in be in next step?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org