You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/09 16:18:00 UTC
[doris] 08/13: [enhancement](index) Nereids support no need to read raw data for index column that only in filter conditions (#20605)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4a33b956e620c969abbfccced21cf063ba3780e7
Author: YueW <45...@users.noreply.github.com>
AuthorDate: Fri Jun 9 21:54:48 2023 +0800
[enhancement](index) Nereids support no need to read raw data for index column that only in filter conditions (#20605)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 19 ++-
be/src/olap/rowset/segment_v2/segment_iterator.h | 1 +
be/src/vec/exec/scan/new_olap_scan_node.cpp | 3 -
.../org/apache/doris/planner/OriginalPlanner.java | 44 +------
.../test_index_no_need_read_data.out | 129 +++++++++++++++++++++
.../test_index_no_need_read_data.groovy | 88 ++++++++++++++
6 files changed, 241 insertions(+), 43 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 92717945b1..53d89e6395 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -250,6 +250,10 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
_vec_init_char_column_id();
}
+
+ if (opts.output_columns != nullptr) {
+ _output_columns = *(opts.output_columns);
+ }
return Status::OK();
}
@@ -917,7 +921,20 @@ Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
}
bool SegmentIterator::_need_read_data(ColumnId cid) {
- // TODO(xk) impl right logic
+ if (_output_columns.count(-1)) {
+ // if _output_columns contains -1, it means that the light
+ // weight schema change may not be enabled or other reasons
+ // caused the column unique_id not be set, to prevent errors
+ // occurring, return true here that column data needs to be read
+ return true;
+ }
+ int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
+ if (_need_read_data_indices.count(unique_id) > 0 && !_need_read_data_indices[unique_id] &&
+ _output_columns.count(unique_id) < 1) {
+ VLOG_DEBUG << "SegmentIterator no need read data for column: "
+ << _opts.tablet_schema->column_by_uid(unique_id).name();
+ return false;
+ }
return true;
}
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 899ee46e3a..93b8b398e7 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -431,6 +431,7 @@ private:
std::vector<ColumnPredicate*> _filter_info_id;
bool _record_rowids = false;
int32_t _tablet_id = 0;
+ std::set<int32_t> _output_columns;
};
} // namespace segment_v2
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index ac916bfcd1..740a57e793 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -440,9 +440,6 @@ Status NewOlapScanNode::_init_scanners(std::list<VScannerSPtr>* scanners) {
if (!_olap_scan_node.output_column_unique_ids.empty()) {
for (auto uid : _olap_scan_node.output_column_unique_ids) {
- if (uid < 0) {
- continue;
- }
_maybe_read_column_ids.emplace(uid);
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
index 024ea0647b..1fbd140d45 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java
@@ -572,50 +572,16 @@ public class OriginalPlanner extends Planner {
* column unique id for `A` and `B` will put into outputColumnUniqueIds.
*
*/
+ // this opt will only work with nereidsPlanner
private void pushOutColumnUniqueIdsToOlapScan(PlanFragment rootFragment, Analyzer analyzer) {
Set<Integer> outputColumnUniqueIds = new HashSet<>();
- ArrayList<Expr> outputExprs = rootFragment.getOutputExprs();
- for (Expr expr : outputExprs) {
- if (expr instanceof SlotRef) {
- if (((SlotRef) expr).getColumn() != null) {
- outputColumnUniqueIds.add(((SlotRef) expr).getColumn().getUniqueId());
- }
- }
- }
+ // add '-1' to avoid the optimization incorrect work with OriginalPlanner,
+ // because in the storage layer will skip this optimization if outputColumnUniqueIds contains '-1',
+ // to ensure the optimization only correct work with nereidsPlanner
+ outputColumnUniqueIds.add(-1);
for (PlanFragment fragment : fragments) {
PlanNode node = fragment.getPlanRoot();
- PlanNode parent = null;
- while (node.getChildren().size() != 0) {
- for (PlanNode childNode : node.getChildren()) {
- List<SlotId> outputSlotIds = childNode.getOutputSlotIds();
- if (outputSlotIds != null) {
- for (SlotId sid : outputSlotIds) {
- SlotDescriptor slotDesc = analyzer.getSlotDesc(sid);
- outputColumnUniqueIds.add(slotDesc.getUniqueId());
- }
- }
- }
- // OlapScanNode is the last node.
- // So, just get the two node and check if they are SortNode and OlapScan.
- parent = node;
- node = node.getChildren().get(0);
- }
-
- if (parent instanceof SortNode) {
- SortNode sortNode = (SortNode) parent;
- List<Expr> orderingExprs = sortNode.getSortInfo().getOrigOrderingExprs();
- if (orderingExprs != null) {
- for (Expr expr : orderingExprs) {
- if (expr instanceof SlotRef) {
- if (((SlotRef) expr).getColumn() != null) {
- outputColumnUniqueIds.add(((SlotRef) expr).getColumn().getUniqueId());
- }
- }
- }
- }
- }
-
if (!(node instanceof OlapScanNode)) {
continue;
}
diff --git a/regression-test/data/inverted_index_p0/test_index_no_need_read_data.out b/regression-test/data/inverted_index_p0/test_index_no_need_read_data.out
new file mode 100644
index 0000000000..01a08f324d
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_no_need_read_data.out
@@ -0,0 +1,129 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_nereids_0 --
+1 \N addr qie3 yy lj 100
+2 \N hehe \N lala 200
+3 beijing addr xuanwu wugui \N 300
+4 beijing addr fengtai fengtai1 fengtai2 \N
+5 beijing addr chaoyang wangjing donghuqu 500
+6 shanghai hehe \N haha \N
+7 tengxun qie addr gg lj \N
+8 tengxun2 qie \N lj 800
+
+-- !select_nereids_1 --
+4
+
+-- !select_nereids_2 --
+3
+
+-- !select_nereids_3 --
+3
+
+-- !select_nereids_4 --
+3 beijing addr xuanwu wugui \N 300
+4 beijing addr fengtai fengtai1 fengtai2 \N
+5 beijing addr chaoyang wangjing donghuqu 500
+
+-- !select_nereids_5 --
+beijing addr xuanwu wugui
+beijing addr fengtai fengtai1
+beijing addr chaoyang wangjing
+
+-- !select_nereids_6 --
+hehe \N
+qie addr gg
+qie \N
+
+-- !select_nereids_7 --
+hehe \N
+qie addr gg
+qie \N
+
+-- !select_nereids_8 --
+SHANGHAI \N
+TENGXUN addr gg
+TENGXUN2 \N
+
+-- !select_nereids_9 --
+4 \N
+3 addr gg
+3 \N
+
+-- !select_nereids_10 --
+hehe \N
+qie addr gg
+qie \N
+
+-- !select_nereids_11 --
+hehe \N SHANGHAI
+qie addr gg TENGXUN
+qie \N TENGXUN2
+
+-- !select_nereids_12 --
+300
+\N
+500
+
+-- !select_0 --
+1 \N addr qie3 yy lj 100
+2 \N hehe \N lala 200
+3 beijing addr xuanwu wugui \N 300
+4 beijing addr fengtai fengtai1 fengtai2 \N
+5 beijing addr chaoyang wangjing donghuqu 500
+6 shanghai hehe \N haha \N
+7 tengxun qie addr gg lj \N
+8 tengxun2 qie \N lj 800
+
+-- !select_1 --
+4
+
+-- !select_2 --
+3
+
+-- !select_3 --
+3
+
+-- !select_4 --
+3 beijing addr xuanwu wugui \N 300
+4 beijing addr fengtai fengtai1 fengtai2 \N
+5 beijing addr chaoyang wangjing donghuqu 500
+
+-- !select_5 --
+beijing addr xuanwu wugui
+beijing addr fengtai fengtai1
+beijing addr chaoyang wangjing
+
+-- !select_6 --
+hehe \N
+qie addr gg
+qie \N
+
+-- !select_7 --
+hehe \N
+qie addr gg
+qie \N
+
+-- !select_8 --
+SHANGHAI \N
+TENGXUN addr gg
+TENGXUN2 \N
+
+-- !select_9 --
+4 \N
+3 addr gg
+3 \N
+
+-- !select_10 --
+hehe \N
+qie addr gg
+qie \N
+
+-- !select_11 --
+hehe \N SHANGHAI
+qie addr gg TENGXUN
+qie \N TENGXUN2
+
+-- !select_12 --
+300
+\N
+500
+
diff --git a/regression-test/suites/inverted_index_p0/test_index_no_need_read_data.groovy b/regression-test/suites/inverted_index_p0/test_index_no_need_read_data.groovy
new file mode 100644
index 0000000000..e5bc37e2fd
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_no_need_read_data.groovy
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_no_need_read_data", "inverted_index_select"){
+ def table1 = "test_index_no_need_read_data"
+
+ sql "drop table if exists ${table1}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS `${table1}` (
+ `id` int NULL COMMENT "",
+ `city` varchar(20) NULL COMMENT "",
+ `addr` varchar(20) NULL COMMENT "",
+ `name` varchar(20) NULL COMMENT "",
+ `compy` varchar(20) NULL COMMENT "",
+ `n` int NULL COMMENT "",
+ INDEX idx_city(city) USING INVERTED,
+ INDEX idx_addr(addr) USING INVERTED PROPERTIES("parser"="english"),
+ INDEX idx_n(n) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2"
+ )
+ """
+
+ sql """insert into ${table1} values
+ (1,null,'addr qie3','yy','lj',100),
+ (2,null,'hehe',null,'lala',200),
+ (3,'beijing','addr xuanwu','wugui',null,300),
+ (4,'beijing','addr fengtai','fengtai1','fengtai2',null),
+ (5,'beijing','addr chaoyang','wangjing','donghuqu',500),
+ (6,'shanghai','hehe',null,'haha',null),
+ (7,'tengxun','qie','addr gg','lj',null),
+ (8,'tengxun2','qie',null,'lj',800)
+ """
+
+ // case1: enable nereids planner
+ sql "set enable_nereids_planner = true"
+
+ qt_select_nereids_0 "SELECT * FROM ${table1} ORDER BY id"
+ qt_select_nereids_1 "SELECT count() FROM ${table1} WHERE n > 100"
+ qt_select_nereids_2 "SELECT count() FROM ${table1} WHERE city = 'beijing'"
+ qt_select_nereids_3 "SELECT count(*) FROM ${table1} WHERE city = 'beijing'"
+ qt_select_nereids_4 "SELECT * FROM ${table1} WHERE city = 'beijing' ORDER BY id"
+ qt_select_nereids_5 "SELECT city, addr, name FROM ${table1} WHERE city = 'beijing' ORDER BY id"
+ qt_select_nereids_6 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY city"
+ qt_select_nereids_7 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY id"
+ qt_select_nereids_8 "SELECT upper(city), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
+ qt_select_nereids_9 "SELECT length(addr), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
+ qt_select_nereids_10 "SELECT addr, name FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
+ qt_select_nereids_11 "SELECT addr, name, upper(city) FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
+ qt_select_nereids_12 "SELECT sum(n) FROM ${table1} WHERE city = 'beijing' group by id ORDER BY id"
+
+ // case2: disable nereids planner
+ sql "set enable_nereids_planner = false"
+
+ qt_select_0 "SELECT * FROM ${table1} ORDER BY id"
+ qt_select_1 "SELECT count() FROM ${table1} WHERE n > 100"
+ qt_select_2 "SELECT count() FROM ${table1} WHERE city = 'beijing'"
+ qt_select_3 "SELECT count(*) FROM ${table1} WHERE city = 'beijing'"
+ qt_select_4 "SELECT * FROM ${table1} WHERE city = 'beijing' ORDER BY id"
+ qt_select_5 "SELECT city, addr, name FROM ${table1} WHERE city = 'beijing' ORDER BY id"
+ qt_select_6 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY city"
+ qt_select_7 "SELECT addr, name FROM ${table1} WHERE city > 'beijing' ORDER BY id"
+ qt_select_8 "SELECT upper(city), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
+ qt_select_9 "SELECT length(addr), name FROM ${table1} WHERE city != 'beijing' ORDER BY id"
+ qt_select_10 "SELECT addr, name FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
+ qt_select_11 "SELECT addr, name, upper(city) FROM ( SELECT * from ${table1} WHERE city != 'beijing' ORDER BY id) t"
+ qt_select_12 "SELECT sum(n) FROM ${table1} WHERE city = 'beijing' group by id ORDER BY id"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org