You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/04/09 06:13:29 UTC

[incubator-doris] branch master updated: [Bug] Fix bug that isPreAggregation is incorrectly set (#5608)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c7d8d2  [Bug] Fix bug that isPreAggregation is incorrectly set (#5608)
9c7d8d2 is described below

commit 9c7d8d2e989b5e706e8f940b34e672bfba5d4158
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Fri Apr 9 14:13:06 2021 +0800

    [Bug] Fix bug that isPreAggregation is incorrectly set (#5608)
    
    1. The MaterializedViewSelector should be reset for each scan node
    2. On the BE side, columns with delete conditions must be added to the return column.
---
 be/src/exec/olap_scanner.cpp                       |  1 +
 be/src/olap/reader.cpp                             |  3 ++-
 .../doris/planner/MaterializedViewSelector.java    | 21 ++++++++++++----
 .../org/apache/doris/planner/OlapScanNode.java     | 28 +++++++++++++++-------
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 8d9b8a4..197febb 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -172,6 +172,7 @@ Status OlapScanner::_init_params(const std::vector<OlapScanRange*>& key_ranges,
     if (_aggregation || single_version) {
         _params.return_columns = _return_columns;
     } else {
+        // we need to fetch all key columns to do the right aggregation on storage engine side.
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
             _params.return_columns.push_back(i);
         }
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index cb2c6e2..39081ee 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -458,7 +458,8 @@ OLAPStatus Reader::_init_params(const ReaderParams& read_params) {
 OLAPStatus Reader::_init_return_columns(const ReaderParams& read_params) {
     if (read_params.reader_type == READER_QUERY) {
         _return_columns = read_params.return_columns;
-        if (!_delete_handler.empty() && read_params.aggregation) {
+        if (!_delete_handler.empty()) {
+            // We need to fetch columns which there are deletion conditions on them.
             set<uint32_t> column_set(_return_columns.begin(), _return_columns.end());
             for (const auto& conds : _delete_handler.get_delete_conditions()) {
                 for (const auto& cond_column : conds.del_cond->columns()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
index 13a63d4..ef30f91 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MaterializedViewSelector.java
@@ -83,8 +83,14 @@ public class MaterializedViewSelector {
     private Map<Long, Set<String>> columnNamesInQueryOutput = Maps.newHashMap();
 
     private boolean disableSPJGView;
-    private String reasonOfDisable;
+
+    // The Following 2 variables should be reset each time before calling selectBestMV();
+    // Unlike the "isPreAggregation" in OlapScanNode which defaults to false,
+    // it defaults to true here. It is because in this class, we started to choose MV under the premise
+    // that the default base tables are duplicate key tables. For the aggregation key table,
+    // this variable will be set to false compensatively at the end.
     private boolean isPreAggregation = true;
+    private String reasonOfDisable;
 
     public MaterializedViewSelector(SelectStmt selectStmt, Analyzer analyzer) {
         this.selectStmt = selectStmt;
@@ -105,6 +111,7 @@ public class MaterializedViewSelector {
      * @return
      */
     public BestIndexInfo selectBestMV(ScanNode scanNode) throws UserException {
+        resetPreAggregationVariables();
         long start = System.currentTimeMillis();
         Preconditions.checkState(scanNode instanceof OlapScanNode);
         OlapScanNode olapScanNode = (OlapScanNode) scanNode;
@@ -113,11 +120,18 @@ public class MaterializedViewSelector {
             return null;
         }
         long bestIndexId = priorities(olapScanNode, candidateIndexIdToSchema);
-        LOG.debug("The best materialized view is {} for scan node {} in query {}, cost {}",
-                 bestIndexId, scanNode.getId(), selectStmt.toSql(), (System.currentTimeMillis() - start));
+        LOG.debug("The best materialized view is {} for scan node {} in query {}, " +
+                        "isPreAggregation: {}, reasonOfDisable: {}, cost {}",
+                bestIndexId, scanNode.getId(), selectStmt.toSql(), isPreAggregation, reasonOfDisable,
+                (System.currentTimeMillis() - start));
         return new BestIndexInfo(bestIndexId, isPreAggregation, reasonOfDisable);
     }
 
+    private void resetPreAggregationVariables() {
+        isPreAggregation = true;
+        reasonOfDisable = null;
+    }
+
     private Map<Long, List<Column>> predicates(OlapScanNode scanNode) throws AnalysisException {
         // Step1: all of predicates is compensating predicates
         Map<Long, MaterializedIndexMeta> candidateIndexIdToMeta = scanNode.getOlapTable().getVisibleIndexIdToMeta();
@@ -448,7 +462,6 @@ public class MaterializedViewSelector {
         for (TupleId tupleId : tupleIds) {
             TupleDescriptor tupleDescriptor = analyzer.getTupleDesc(tupleId);
             tupleDescriptor.getTableIdToColumnNames(columnNamesInQueryOutput);
-
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index b2c4a61..4d260ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -65,9 +65,6 @@ import org.apache.doris.thrift.TScanRange;
 import org.apache.doris.thrift.TScanRangeLocation;
 import org.apache.doris.thrift.TScanRangeLocations;
 
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
 import com.google.common.base.Joiner;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
@@ -76,6 +73,9 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Range;
 
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -106,6 +106,19 @@ public class OlapScanNode extends ScanNode {
      * Query2: select k1, min(v1) from table group by k1
      * This aggregation function in query is min which different from the schema.
      * So the data stored in storage engine need to be merged firstly before returning to scan node.
+     *
+     * There are currently two places to modify this variable:
+     * 1. The turnOffPreAgg() method of SingleNodePlanner.
+     *      This method will only be called on the left deepest OlapScanNode the plan tree,
+     *      while other nodes are false by default (because the Aggregation operation is executed after Join,
+     *      we cannot judge whether other OlapScanNodes can close the pre-aggregation).
+     *      So even the Duplicate key table, if it is not the left deepest node, it will remain false too.
+     *
+     * 2. After MaterializedViewSelector selects the materialized view, the updateScanRangeInfoByNewMVSelector()\
+     *    method of OlapScanNode may be called to update this variable.
+     *      This call will be executed on all ScanNodes in the plan tree. In this step,
+     *      for the DuplicateKey table, the variable will be set to true.
+     *      See comment of "isPreAggregation" variable in MaterializedViewSelector for details.
      */
     private boolean isPreAggregation = false;
     private String reasonOfPreAggregation = null;
@@ -227,8 +240,7 @@ public class OlapScanNode extends ScanNode {
 
         if (update) {
             this.selectedIndexId = selectedIndexId;
-            this.isPreAggregation = isPreAggregation;
-            this.reasonOfPreAggregation = reasonOfDisable;
+            setIsPreAggregation(isPreAggregation, reasonOfDisable);
             updateColumnType();
             LOG.info("Using the new scan range info instead of the old one. {}, {}", situation ,scanRangeInfo);
         } else {
@@ -630,11 +642,11 @@ public class OlapScanNode extends ScanNode {
         }
         msg.node_type = TPlanNodeType.OLAP_SCAN_NODE;
         msg.olap_scan_node =
-              new TOlapScanNode(desc.getId().asInt(), keyColumnNames, keyColumnTypes, isPreAggregation);
+                new TOlapScanNode(desc.getId().asInt(), keyColumnNames, keyColumnTypes, isPreAggregation);
         if (null != sortColumn) {
             msg.olap_scan_node.setSortColumn(sortColumn);
         }
-	msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift());
+        msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift());
     }
 
     // export some tablets
@@ -647,7 +659,7 @@ public class OlapScanNode extends ScanNode {
         olapScanNode.selectedPartitionNum = 1;
         olapScanNode.selectedTabletsNum = 1;
         olapScanNode.totalTabletsNum = 1;
-        olapScanNode.isPreAggregation = false;
+        olapScanNode.setIsPreAggregation(false, "Export job");
         olapScanNode.isFinalized = true;
         olapScanNode.result.addAll(locationsList);
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org