You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2024/04/03 07:45:27 UTC

(doris) branch branch-2.0 updated: [fix](inverted index) fix the incorrect result issue of COUNT_ON_INDEX for key columns #33164 (#33196)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 819b9103d2e [fix](inverted index) fix the incorrect result issue of COUNT_ON_INDEX for key columns #33164 (#33196)
819b9103d2e is described below

commit 819b9103d2e3151a0572f169f0834044daf55398
Author: zzzxl <33...@users.noreply.github.com>
AuthorDate: Wed Apr 3 15:45:21 2024 +0800

    [fix](inverted index) fix the incorrect result issue of COUNT_ON_INDEX for key columns #33164 (#33196)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 18 ++++--
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  5 +-
 .../data/inverted_index_p0/test_index_delete.out   | 16 ++++++
 .../inverted_index_p0/test_index_delete.groovy     | 65 ++++++++++++++++++++++
 4 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 80fb6f7856c..28311f316a1 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1051,7 +1051,7 @@ bool SegmentIterator::_need_read_data(ColumnId cid) {
         return true;
     }
     // if there is delete predicate, we always need to read data
-    if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) {
+    if (_has_delete_predicate(cid)) {
         return true;
     }
     if (_output_columns.count(-1)) {
@@ -1787,7 +1787,7 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
 
     for (auto cid : _first_read_column_ids) {
         auto& column = _current_return_columns[cid];
-        if (_need_read_key_data(cid, column, nrows_read)) {
+        if (_no_need_read_key_data(cid, column, nrows_read)) {
             continue;
         }
         if (_prune_column(cid, column, true, nrows_read)) {
@@ -2463,8 +2463,8 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     }
 }
 
-bool SegmentIterator::_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
-                                          size_t nrows_read) {
+bool SegmentIterator::_no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
+                                             size_t nrows_read) {
     if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
         return false;
     }
@@ -2477,6 +2477,10 @@ bool SegmentIterator::_need_read_key_data(ColumnId cid, vectorized::MutableColum
         return false;
     }
 
+    if (_has_delete_predicate(cid)) {
+        return false;
+    }
+
     std::set<uint32_t> cids;
     for (auto* pred : _col_predicates) {
         cids.insert(pred->column_id());
@@ -2501,5 +2505,11 @@ bool SegmentIterator::_need_read_key_data(ColumnId cid, vectorized::MutableColum
     return true;
 }
 
+bool SegmentIterator::_has_delete_predicate(ColumnId cid) {
+    std::set<uint32_t> delete_columns_set;
+    _opts.delete_condition_predicates->get_all_column_ids(delete_columns_set);
+    return delete_columns_set.contains(cid);
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index a282712da3c..5d32367f94d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -322,7 +322,10 @@ private:
         return 0;
     }
 
-    bool _need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column, size_t nrows_read);
+    bool _no_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
+                                size_t nrows_read);
+
+    bool _has_delete_predicate(ColumnId cid);
 
     class BitmapRangeIterator;
     class BackwardBitmapRangeIterator;
diff --git a/regression-test/data/inverted_index_p0/test_index_delete.out b/regression-test/data/inverted_index_p0/test_index_delete.out
new file mode 100644
index 00000000000..e0f6bfb1311
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_delete.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+6
+
+-- !sql --
+0
+
diff --git a/regression-test/suites/inverted_index_p0/test_index_delete.groovy b/regression-test/suites/inverted_index_p0/test_index_delete.groovy
new file mode 100644
index 00000000000..a189884508b
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_delete.groovy
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_delete", "p0") {
+    def indexTbName1 = "test_index_delete"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+
+    sql """
+      CREATE TABLE ${indexTbName1} (
+        `a` int(11) NULL COMMENT "",
+        `b` text NULL COMMENT "",
+        INDEX b_idx (`b`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`a`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "disable_auto_compaction" = "true"
+      );
+    """
+
+    sql """ INSERT INTO ${indexTbName1} VALUES (1, "1"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (2, "1"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (3, "1"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (4, "2"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (5, "2"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (6, "2"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (7, "3"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (8, "3"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (9, "3"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (10, "4"); """
+
+    try {
+        sql "sync"
+
+        sql """ delete from ${indexTbName1} where a >= 9; """
+        qt_sql """ select count() from ${indexTbName1} where a >= 1 and a <= 10; """
+        qt_sql """ select count() from ${indexTbName1} where a >= 1; """
+        qt_sql """ select count() from ${indexTbName1} where a <= 10; """
+        
+        sql """ delete from ${indexTbName1} where b = '3'; """
+        qt_sql """ select count() from ${indexTbName1} where a >= 1; """
+        qt_sql """ select count() from ${indexTbName1} where b match '3'; """
+
+    } finally {
+        //try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org