You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "morningman (via GitHub)" <gi...@apache.org> on 2023/06/06 14:07:39 UTC

[GitHub] [doris] morningman commented on a diff in pull request #20415: [Improvement](statistics)Support external table partition statistics.

morningman commented on code in PR #20415:
URL: https://github.com/apache/doris/pull/20415#discussion_r1219717424


##########
fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java:
##########
@@ -345,6 +346,13 @@ public Partition getPartition(List<String> partitionValues) {
         return client.getPartition(dbName, name, partitionValues);
     }
 
+    @Override
+    public Set<String> getPartitionNames() {
+        PooledHiveMetaStoreClient client = ((HMSExternalCatalog) catalog).getClient();

Review Comment:
   call `makeSureInitialized()` before



##########
fe/fe-core/src/main/cup/sql_parser.cup:
##########
@@ -2522,6 +2522,16 @@ opt_col_list ::=
     :}
     ;
 
+opt_partition_list ::=

Review Comment:
   We already had an entry `partition_names ::=`



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java:
##########
@@ -308,6 +308,10 @@ private Map<String, Set<String>> validateAndGetPartitions(TableIf table, Set<Str
             return columnToPartitions;
         }
 
+        if (table instanceof HMSExternalTable) {

Review Comment:
   Add some comment to explain this early return



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java:
##########
@@ -541,6 +551,9 @@ private void createTaskForExternalTable(AnalysisInfo jobInfo,
         AnalysisInfo analysisInfo = colTaskInfoBuilder.setIndexId(-1L)
                 .setTaskId(taskId).setExternalTableLevelTask(true).build();
         analysisTasks.put(taskId, createTask(analysisInfo));
+        if (isSync) {

Review Comment:
   Add some comment to explain this early return



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java:
##########
@@ -109,66 +117,140 @@ public HiveAnalysisTask(AnalysisInfo info) {
      */
     @Override
     protected void getStatsBySql() throws Exception {
-        getTableStatsBySql();
-        getPartitionStatsBySql();
-        getTableColumnStatsBySql();
-        getPartitionColumnStatsBySql();
+        if (isTableLevelTask) {
+            getTableStatsBySql();
+        } else {
+            getTableColumnStatsBySql();
+        }
     }
 
     /**
      * Get table row count and insert the result to __internal_schema.table_statistics
      */
     private void getTableStatsBySql() throws Exception {
-        Map<String, String> params = buildTableStatsParams();
-        List<InternalQueryResult.ResultRow> columnResult =
-                StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
-                .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
-        String rowCount = columnResult.get(0).getColumnValue("rowCount");
-        params.put("rowCount", rowCount);
-        StatisticsRepository.persistTableStats(params);
+        // Get table level information.

Review Comment:
   Add an example of the final sql in comment 



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java:
##########
@@ -109,66 +117,140 @@ public HiveAnalysisTask(AnalysisInfo info) {
      */
     @Override
     protected void getStatsBySql() throws Exception {
-        getTableStatsBySql();
-        getPartitionStatsBySql();
-        getTableColumnStatsBySql();
-        getPartitionColumnStatsBySql();
+        if (isTableLevelTask) {
+            getTableStatsBySql();
+        } else {
+            getTableColumnStatsBySql();
+        }
     }
 
     /**
      * Get table row count and insert the result to __internal_schema.table_statistics
      */
     private void getTableStatsBySql() throws Exception {
-        Map<String, String> params = buildTableStatsParams();
-        List<InternalQueryResult.ResultRow> columnResult =
-                StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
-                .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
-        String rowCount = columnResult.get(0).getColumnValue("rowCount");
-        params.put("rowCount", rowCount);
-        StatisticsRepository.persistTableStats(params);
+        // Get table level information.
+        Map<String, String> parameters = table.getRemoteTable().getParameters();
+        if (isPartitionOnly) {
+            for (String partId : partitionNames) {
+                StringBuilder sb = new StringBuilder();
+                sb.append(ANALYZE_TABLE_COUNT_TEMPLATE);
+                sb.append(" where ");
+                String[] splits = partId.split("/");
+                for (int i = 0; i < splits.length; i++) {
+                    String value = splits[i].split("=")[1];
+                    splits[i] = splits[i].replace(value, "\'" + value + "\'");
+                }
+                sb.append(StringUtils.join(splits, " and "));
+                Map<String, String> params = buildTableStatsParams(partId);
+                setParameterData(parameters, params);
+                List<InternalQueryResult.ResultRow> columnResult =
+                        StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
+                        .replace(sb.toString()));
+                String rowCount = columnResult.get(0).getColumnValue("rowCount");
+                params.put("rowCount", rowCount);
+                StatisticsRepository.persistTableStats(params);
+            }
+        } else {
+            Map<String, String> params = buildTableStatsParams("NULL");
+            List<InternalQueryResult.ResultRow> columnResult =
+                    StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
+                    .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
+            String rowCount = columnResult.get(0).getColumnValue("rowCount");
+            params.put("rowCount", rowCount);
+            StatisticsRepository.persistTableStats(params);
+        }
     }
 
     /**
      * Get column statistics and insert the result to __internal_schema.column_statistics
      */
     private void getTableColumnStatsBySql() throws Exception {
-        Map<String, String> params = buildTableStatsParams();
-        params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
-        params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
-        params.put("colName", col.getName());
-        params.put("colId", info.colName);
-        params.put("dataSizeFunction", getDataSizeFunction(col));
-        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
-        String sql = stringSubstitutor.replace(ANALYZE_SQL_TABLE_TEMPLATE);
-        try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) {
-            r.connectContext.getSessionVariable().disableNereidsPlannerOnce();
-            this.stmtExecutor = new StmtExecutor(r.connectContext, sql);
-            this.stmtExecutor.execute();
+        if (isPartitionOnly) {

Review Comment:
   Add an example of the final sql in comment 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org