You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/05/17 06:01:27 UTC
[incubator-doris] branch master updated: [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization (#9547)

This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1cc9653bd8 [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization (#9547)
1cc9653bd8 is described below

commit 1cc9653bd85e0b1b4f92d428879b511b0f7dc878
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Tue May 17 14:01:22 2022 +0800

    [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization (#9547)
    
    Co-authored-by: lihaopeng <li...@baidu.com>
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 41 +++++++++++++++-------
 regression-test/data/delete/test_delete.out        | 16 +++++++++
 regression-test/suites/delete/test_delete.groovy   | 17 +++++++--
 3 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index ec2093e2f8..c8dbc44891 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -797,7 +797,9 @@ void SegmentIterator::_init_current_block(
         auto cid = _schema.column_id(i);
         auto column_desc = _schema.column(cid);
 
-        if (_is_pred_column[cid]) { //todo(wb) maybe we can release it after output block
+        // the column in in block must clear() here to insert new data
+        if (_is_pred_column[cid] ||
+            i >= block->columns()) { //todo(wb) maybe we can release it after output block
             current_columns[cid]->clear();
         } else { // non-predicate column
             current_columns[cid] = std::move(*block->get_by_position(i).column).mutate();
@@ -815,8 +817,12 @@ void SegmentIterator::_init_current_block(
 void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
     SCOPED_RAW_TIMER(&_opts.stats->output_col_ns);
     for (auto cid : _non_predicate_columns) {
-        block->replace_by_position(_schema_block_id_map[cid],
-                                   std::move(_current_return_columns[cid]));
+        auto loc = _schema_block_id_map[cid];
+        // if loc < block->block->columns() means the the column is delete column and should
+        // not output by block, so just skip the column.
+        if (loc < block->columns()) {
+            block->replace_by_position(loc, std::move(_current_return_columns[cid]));
+        }
     }
 }
 
@@ -950,15 +956,24 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
             _block_rowids.resize(_opts.block_row_max);
         }
         _current_return_columns.resize(_schema.columns().size());
-        if (_is_need_vec_eval || _is_need_short_eval) {
-            for (size_t i = 0; i < _schema.num_column_ids(); i++) {
-                auto cid = _schema.column_id(i);
-                if (_is_pred_column[cid]) {
-                    auto column_desc = _schema.column(cid);
-                    _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
-                            column_desc->type(), column_desc->is_nullable());
-                    _current_return_columns[cid]->reserve(_opts.block_row_max);
-                }
+        for (size_t i = 0; i < _schema.num_column_ids(); i++) {
+            auto cid = _schema.column_id(i);
+            auto column_desc = _schema.column(cid);
+            if (_is_pred_column[cid]) {
+                _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
+                        column_desc->type(), column_desc->is_nullable());
+                _current_return_columns[cid]->reserve(_opts.block_row_max);
+            } else if (i >= block->columns()) {
+                // if i >= block->columns means the column and not the pred_column means `column i` is
+                // a delete condition column. but the column is not effective in the segment. so we just
+                // create a column to hold the data.
+                // a. origin data -> b. delete condition -> c. new load data
+                // the segment of c do not effective delete condition, but it still need read the column
+                // to match the schema.
+                // TODO: skip read the not effective delete column to speed up segment read.
+                _current_return_columns[cid] =
+                        Schema::get_data_type_ptr(*column_desc)->create_column();
+                _current_return_columns[cid]->reserve(_opts.block_row_max);
             }
         }
     }
@@ -973,7 +988,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
     _opts.stats->raw_rows_read += nrows_read;
 
     if (nrows_read == 0) {
-        for (int i = 0; i < _schema.num_column_ids(); i++) {
+        for (int i = 0; i < block->columns(); i++) {
             auto cid = _schema.column_id(i);
             // todo(wb) abstract make column where
             if (!_is_pred_column[cid]) { // non-predicate
diff --git a/regression-test/data/delete/test_delete.out b/regression-test/data/delete/test_delete.out
new file mode 100644
index 0000000000..5dbc5223f6
--- /dev/null
+++ b/regression-test/data/delete/test_delete.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
+-- !sql --
+8
+
diff --git a/regression-test/suites/delete/test_delete.groovy b/regression-test/suites/delete/test_delete.groovy
index 7fd9bc6f4b..97dfab32bb 100644
--- a/regression-test/suites/delete/test_delete.groovy
+++ b/regression-test/suites/delete/test_delete.groovy
@@ -16,6 +16,19 @@
 // under the License.
 
 suite("test_delete", "delete") {
-    // todo: test delete
-    sql "show delete"
+    def tableName = "delete_regression_test"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """ CREATE TABLE ${tableName} (c1 varchar(190) NOT NULL COMMENT "",c2 bigint(20) NOT NULL COMMENT "", c3 varchar(160) NULL COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c3) BUCKETS 3 
+    PROPERTIES ( "replication_num" = "1" );"""
+
+    sql """INSERT INTO ${tableName} VALUES ('abcdef',1,'fjdsajfldjafljdslajfdl'),('abcdef',2,'fjdsajfldjafljdslajfdl'),('abcdef',4,'fjdsajfldjafljdslajfdl'),('abcdef',5,'fjdsajfldjafljdslajfdl')"""
+    sql """delete from ${tableName} where c1 = 'fjdsajfldjafljdslajfdl';"""
+    sql """INSERT INTO ${tableName} VALUES ('abcdef',1,'fjdsajfldjafljdslajfdl'),('abcdef',2,'fjdsajfldjafljdslajfdl'),('abcdef',4,'fjdsajfldjafljdslajfdl'),('abcdef',5,'fjdsajfldjafljdslajfdl');"""
+    
+    qt_sql """select count(*) from ${tableName};"""
+    qt_sql """select count(c2) from ${tableName};"""
+    qt_sql """select count(c2) from ${tableName} where c1 = 'abcdef';"""
+    qt_sql """select count(c1) from ${tableName};"""
+    qt_sql """select count(c1) from ${tableName} where c1 = 'abcdef';"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org