You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by wa...@apache.org on 2022/05/06 11:16:43 UTC

[incubator-doris] branch master updated: [refactor]refactor lazy materialized (#8834)

This is an automated email from the ASF dual-hosted git repository.

wangbo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a9831f87f2 [refactor]refactor lazy materialized (#8834)
a9831f87f2 is described below

commit a9831f87f25aa1e5b2c55cfce141c510b10e2a42
Author: wangbo <wa...@apache.org>
AuthorDate: Fri May 6 19:16:35 2022 +0800

    [refactor]refactor lazy materialized (#8834)
    
    [refactor]refactor lazy materialized (#8834)
---
 be/src/exec/olap_scan_node.cpp                     |   2 +-
 be/src/exec/olap_scan_node.h                       |   2 +-
 be/src/exec/olap_scanner.cpp                       |   2 +-
 be/src/olap/column_predicate.h                     |   2 +-
 be/src/olap/null_predicate.cpp                     |   2 +-
 be/src/olap/olap_common.h                          |   2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 227 ++++++++++++---------
 be/src/olap/rowset/segment_v2/segment_iterator.h   |   3 +-
 .../storage/test_dup_tab_basic_int_nullable.groovy |   4 +
 .../test_dup_tab_basic_varchar_nullable.groovy     |   5 +
 .../storage/test_dup_tab_char_nullable.groovy      |   2 +
 .../storage/test_dup_tab_date_nullable.groovy      |   4 +
 .../storage/test_dup_tab_datetime_nullable.groovy  |   4 +
 .../storage/test_dup_tab_decimal_nullable.groovy   |   4 +
 .../test_dup_tab_mixed_type_nullable.groovy        |   3 +
 15 files changed, 165 insertions(+), 103 deletions(-)

diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 61befd7eda..c0ec6f1150 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -126,7 +126,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
     _rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT);
     _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime");
     _short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime");
-    _pred_col_read_timer = ADD_TIMER(_segment_profile, "PredColumnReadTime");
+    _first_read_timer = ADD_TIMER(_segment_profile, "FirstReadTime");
     _lazy_read_timer = ADD_TIMER(_segment_profile, "LazyReadTime");
     _output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime");
 
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index 688c4517a3..6313101b2b 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -290,7 +290,7 @@ protected:
     RuntimeProfile::Counter* _rows_vec_cond_counter = nullptr;
     RuntimeProfile::Counter* _vec_cond_timer = nullptr;
     RuntimeProfile::Counter* _short_cond_timer = nullptr;
-    RuntimeProfile::Counter* _pred_col_read_timer = nullptr;
+    RuntimeProfile::Counter* _first_read_timer = nullptr;
     RuntimeProfile::Counter* _lazy_read_timer = nullptr;
     RuntimeProfile::Counter* _output_col_timer = nullptr;
 
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 2478d2b2bf..40029414b8 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -545,7 +545,7 @@ void OlapScanner::update_counter() {
     // COUNTER_UPDATE(_parent->_filtered_rows_counter, stats.num_rows_filtered);
     COUNTER_UPDATE(_parent->_vec_cond_timer, stats.vec_cond_ns);
     COUNTER_UPDATE(_parent->_short_cond_timer, stats.short_cond_ns);
-    COUNTER_UPDATE(_parent->_pred_col_read_timer, stats.pred_col_read_ns);
+    COUNTER_UPDATE(_parent->_first_read_timer, stats.first_read_ns);
     COUNTER_UPDATE(_parent->_lazy_read_timer, stats.lazy_read_ns);
     COUNTER_UPDATE(_parent->_output_col_timer, stats.output_col_ns);
     COUNTER_UPDATE(_parent->_rows_vec_cond_counter, stats.rows_vec_cond_filtered);
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 7ebb4604f0..7aa3218314 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -43,7 +43,7 @@ enum class PredicateType {
     IN_LIST = 7,
     NOT_IN_LIST = 8,
     IS_NULL = 9,
-    NOT_IS_NULL = 10,
+    IS_NOT_NULL = 10,
     BF = 11, // BloomFilter
 };
 
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index 43cfbcaab3..1210e1f439 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -30,7 +30,7 @@ NullPredicate::NullPredicate(uint32_t column_id, bool is_null, bool opposite)
         : ColumnPredicate(column_id), _is_null(opposite != is_null) {}
 
 PredicateType NullPredicate::type() const {
-    return _is_null ? PredicateType::IS_NULL : PredicateType::NOT_IS_NULL;
+    return _is_null ? PredicateType::IS_NULL : PredicateType::IS_NOT_NULL;
 }
 
 void NullPredicate::evaluate(VectorizedRowBatch* batch) const {
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 3afa23f983..c8256d0f2e 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -265,7 +265,7 @@ struct OlapReaderStatistics {
     int64_t rows_vec_del_cond_filtered = 0;
     int64_t vec_cond_ns = 0;
     int64_t short_cond_ns = 0;
-    int64_t pred_col_read_ns = 0;
+    int64_t first_read_ns = 0;
     int64_t lazy_read_ns = 0;
     int64_t output_col_ns = 0;
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 0fd4a16ee4..ec2093e2f8 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -523,7 +523,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
     // phase 1: read rows selected by various index (indicated by _row_bitmap) into block
     // when using lazy-materialization-read, only columns with predicates are read
     {
-        SCOPED_RAW_TIMER(&_opts.stats->pred_col_read_ns);
+        SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
         do {
             uint32_t range_from;
             uint32_t range_to;
@@ -607,22 +607,45 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
 
 /* ---------------------- for vecterization implementation  ---------------------- */
 
+/**
+ *  For storage layer data type, can be measured from two perspectives:
+ *  1 Whether the type can be read in a fast way(batch read using SIMD)
+ *    Such as integer type and float type, this type can be read in SIMD way.
+ *    For the type string/bitmap/hll, they can not be read in batch way, so read this type data is slow.
+ *   If a type can be read fast, we can try to eliminate Lazy Materialization, because we think for this type, seek cost > read cost.
+ *   This is an estimate, if we want more precise cost, statistics collection is necessary(this is a todo).
+ *   In short, when returned non-pred columns contains string/hll/bitmap, we using Lazy Materialization.
+ *   Otherwish, we disable it.
+ *    
+ *   When Lazy Materialization enable, we need to read column at least two times.
+ *   Firt time to read Pred col, second time to read non-pred.
+ *   Here's an interesting question to research, whether read Pred col once is the best plan.
+ *   (why not read Pred col twice or more?)
+ *
+ *   When Lazy Materialization disable, we just need to read once.
+ *   
+ * 
+ *  2 Whether the predicate type can be evaluate in a fast way(using SIMD to eval pred)
+ *    Such as integer type and float type, they can be eval fast.
+ *    But for BloomFilter/string/date, they eval slow.
+ *    If a type can be eval fast, we use vectorizaion to eval it.
+ *    Otherwise, we use short-circuit to eval it.
+ * 
+ *  
+ */
+
 // todo(wb) need a UT here
 void SegmentIterator::_vec_init_lazy_materialization() {
     _is_pred_column.resize(_schema.columns().size(), false);
 
-    // including short_cir_pred_col_id_set and vec_pred_col_id_set
+    // including short/vec/delete pred
     std::set<ColumnId> pred_column_ids;
-    _is_all_column_basic_type = true;
-    bool is_predicate_column_exists = false;
-    bool is_non_predicate_column_exists = false;
+    _lazy_materialization_read = false;
 
     std::set<ColumnId> del_cond_id_set;
     _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set);
 
     if (!_col_predicates.empty() || !del_cond_id_set.empty()) {
-        is_predicate_column_exists = true;
-
         std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid
         std::set<ColumnId> vec_pred_col_id_set;
 
@@ -632,13 +655,16 @@ void SegmentIterator::_vec_init_lazy_materialization() {
             _is_pred_column[cid] = true;
             pred_column_ids.insert(cid);
 
+            // Step1: check pred using short eval or vec eval
             if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
                 type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF ||
                 predicate->type() == PredicateType::IN_LIST ||
-                predicate->type() == PredicateType::NOT_IN_LIST) {
+                predicate->type() == PredicateType::NOT_IN_LIST ||
+                predicate->type() == PredicateType::IS_NULL ||
+                predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE ||
+                type == OLAP_FIELD_TYPE_DECIMAL) {
                 short_cir_pred_col_id_set.insert(cid);
                 _short_cir_eval_predicate.push_back(predicate);
-                _is_all_column_basic_type = false;
             } else {
                 vec_pred_col_id_set.insert(predicate->column_id());
                 if (_pre_eval_block_predicate == nullptr) {
@@ -653,75 +679,84 @@ void SegmentIterator::_vec_init_lazy_materialization() {
         if (!del_cond_id_set.empty()) {
             short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end());
             pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end());
-            _is_all_column_basic_type = false;
 
             for (auto cid : del_cond_id_set) {
                 _is_pred_column[cid] = true;
             }
         }
 
-        if (_schema.column_ids().size() > pred_column_ids.size()) {
-            for (auto cid : _schema.column_ids()) {
-                if (!_is_pred_column[cid]) {
-                    _non_predicate_columns.push_back(cid);
-                    is_non_predicate_column_exists = true;
-
-                    // todo(wb) make a cost-based lazy-materialization framework
-                    // check non-pred column type to decide whether using lazy-materialization
-                    FieldType type = _schema.column(cid)->type();
-                    if (_is_all_column_basic_type &&
-                        (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT ||
-                         type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
-                         type == OLAP_FIELD_TYPE_STRING)) {
-                        _is_all_column_basic_type = false;
-                    }
-                }
-            }
-        }
-
         _vec_pred_column_ids.assign(vec_pred_col_id_set.cbegin(), vec_pred_col_id_set.cend());
         _short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(),
                                           short_cir_pred_col_id_set.cend());
-    } else {
-        _is_all_column_basic_type = false;
-        is_non_predicate_column_exists = true;
+    }
+
+    if (!_vec_pred_column_ids.empty()) {
+        _is_need_vec_eval = true;
+    }
+    if (!_short_cir_pred_column_ids.empty()) {
+        _is_need_short_eval = true;
+    }
+
+    // Step 2: check non-predicate read costs to determine whether need lazy materialization
+    // fill _non_predicate_columns.
+    // note(wb) For block schema, query layer and storage layer may have some diff
+    //   query layer block schema not contains delete column, but storage layer appends delete column to end of block schema
+    //   When output block to query layer, delete column can be skipped.
+    //  _schema.column_ids() stands for storage layer block schema, so it contains delete columnid
+    //  we just regard delete column as common pred column here.
+    if (_schema.column_ids().size() > pred_column_ids.size()) {
         for (auto cid : _schema.column_ids()) {
-            _non_predicate_columns.push_back(cid);
+            if (!_is_pred_column[cid]) {
+                _non_predicate_columns.push_back(cid);
+                FieldType type = _schema.column(cid)->type();
+
+                // todo(wb) maybe we can make read char type faster
+                // todo(wb) support map/array type
+                // todo(wb) consider multiple integer columns cost, such as 1000 columns, maybe lazy materialization faster
+                if (!_lazy_materialization_read &&
+                    (_is_need_vec_eval ||
+                     _is_need_short_eval) && // only when pred exists, we need to consider lazy materialization
+                    (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT ||
+                     type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
+                     type == OLAP_FIELD_TYPE_STRING || type == OLAP_FIELD_TYPE_BOOL ||
+                     type == OLAP_FIELD_TYPE_DATE || type == OLAP_FIELD_TYPE_DATETIME ||
+                     type == OLAP_FIELD_TYPE_DECIMAL)) {
+                    _lazy_materialization_read = true;
+                }
+            }
         }
     }
 
-    // note(wb) in following cases we disable lazy materialization
-    // case 1: when all column is basic type(is_all_column_basic_type = true)
-    //   because we think `seek and read` cost > read page cost, lazy materialize may cause more `seek and read`, so disable it
-    // case 2: all column is predicate column
-    // case 3: all column is not predicate column
-    // todo(wb) need further research more lazy materialization rule, such as get more info from `statistics` for better decision
-    if (_is_all_column_basic_type) {
-        std::set<ColumnId> pred_set(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
+    // Step 3: fill column ids for read and output
+    if (_lazy_materialization_read) {
+        // insert pred cid to first_read_columns
+        for (auto cid : pred_column_ids) {
+            _first_read_column_ids.push_back(cid);
+        }
+    } else if (!_is_need_vec_eval &&
+               !_is_need_short_eval) { // no pred exists, just read and output column
+        for (int i = 0; i < _schema.num_column_ids(); i++) {
+            auto cid = _schema.column_id(i);
+            _first_read_column_ids.push_back(cid);
+        }
+    } else { // pred exits, but we can eliminate lazy materialization
+        // insert pred/non-pred cid to first read columns
+        std::set<ColumnId> pred_id_set;
+        pred_id_set.insert(_short_cir_pred_column_ids.begin(), _short_cir_pred_column_ids.end());
+        pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
         std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
                                         _non_predicate_columns.end());
 
-        // when _is_all_column_basic_type = true, _first_read_column_ids should keep the same order with _schema.column_ids which stands for return column order
         for (int i = 0; i < _schema.num_column_ids(); i++) {
             auto cid = _schema.column_id(i);
-            if (pred_set.find(cid) != pred_set.end()) {
+            if (pred_id_set.find(cid) != pred_id_set.end()) {
                 _first_read_column_ids.push_back(cid);
             } else if (non_pred_set.find(cid) != non_pred_set.end()) {
                 _first_read_column_ids.push_back(cid);
-                // in this case, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
+                // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
                 _is_pred_column[cid] = true;
             }
         }
-
-    } else if (is_predicate_column_exists && !is_non_predicate_column_exists) {
-        _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend());
-    } else if (!is_predicate_column_exists && is_non_predicate_column_exists) {
-        for (auto cid : _non_predicate_columns) {
-            _first_read_column_ids.push_back(cid);
-        }
-    } else {
-        _lazy_materialization_read = true;
-        _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend());
     }
 
     // make _schema_block_id_map
@@ -787,7 +822,7 @@ void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
 
 Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read,
                                                bool set_block_rowid) {
-    SCOPED_RAW_TIMER(&_opts.stats->pred_col_read_ns);
+    SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
     do {
         uint32_t range_from;
         uint32_t range_to;
@@ -818,7 +853,7 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
 void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
                                                         uint16_t& selected_size) {
     SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
-    if (_vec_pred_column_ids.empty()) {
+    if (!_is_need_vec_eval) {
         for (uint32_t i = 0; i < selected_size; ++i) {
             sel_rowid_idx[i] = i;
         }
@@ -859,7 +894,7 @@ void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
 void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_idx,
                                                         uint16_t* selected_size_ptr) {
     SCOPED_RAW_TIMER(&_opts.stats->short_cond_ns);
-    if (_short_cir_pred_column_ids.empty()) {
+    if (!_is_need_short_eval) {
         return;
     }
 
@@ -911,17 +946,19 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
     if (UNLIKELY(!_inited)) {
         RETURN_IF_ERROR(_init(true));
         _inited = true;
-        if (!_vec_pred_column_ids.empty() || !_short_cir_pred_column_ids.empty()) {
+        if (_lazy_materialization_read) {
             _block_rowids.resize(_opts.block_row_max);
         }
         _current_return_columns.resize(_schema.columns().size());
-        for (size_t i = 0; i < _schema.num_column_ids(); i++) {
-            auto cid = _schema.column_id(i);
-            if (_is_pred_column[cid]) {
-                auto column_desc = _schema.column(cid);
-                _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
-                        column_desc->type(), column_desc->is_nullable());
-                _current_return_columns[cid]->reserve(_opts.block_row_max);
+        if (_is_need_vec_eval || _is_need_short_eval) {
+            for (size_t i = 0; i < _schema.num_column_ids(); i++) {
+                auto cid = _schema.column_id(i);
+                if (_is_pred_column[cid]) {
+                    auto column_desc = _schema.column(cid);
+                    _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
+                            column_desc->type(), column_desc->is_nullable());
+                    _current_return_columns[cid]->reserve(_opts.block_row_max);
+                }
             }
         }
     }
@@ -947,48 +984,46 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
         return Status::EndOfFile("no more data in segment");
     }
 
-    // when no predicate(include delete condition) is provided, output column directly
-    if (_vec_pred_column_ids.empty() && _short_cir_pred_column_ids.empty()) {
+    if (!_is_need_vec_eval && !_is_need_short_eval) {
         _output_non_pred_columns(block);
-    } else { // need predicate evaluation
+    } else {
         uint16_t selected_size = nrows_read;
         uint16_t sel_rowid_idx[selected_size];
 
         // step 1: evaluate vectorization predicate
         _evaluate_vectorization_predicate(sel_rowid_idx, selected_size);
 
-        // When predicate column and no-predicate column are both basic type, lazy materialization is eliminate
-        // So output block directly after vectorization evaluation
-        if (_is_all_column_basic_type) {
-            RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
-                                                      selected_size));
-        } else {
-            // step 2: evaluate short ciruit predicate
-            // todo(wb) research whether need to read short predicate after vectorization evaluation
-            //          to reduce cost of read short circuit columns.
-            //          In SSB test, it make no difference; So need more scenarios to test
-            _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
-
-            // step3: read non_predicate column
-            if (!_non_predicate_columns.empty()) {
-                _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx,
-                                        selected_size, &_current_return_columns);
+        // step 2: evaluate short ciruit predicate
+        // todo(wb) research whether need to read short predicate after vectorization evaluation
+        //          to reduce cost of read short circuit columns.
+        //          In SSB test, it make no difference; So need more scenarios to test
+        _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
+
+        if (!_lazy_materialization_read) {
+            Status ret = _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
+                                                   selected_size);
+            if (!ret.ok()) {
+                return ret;
             }
+            // shrink char_type suffix zero data
+            block->shrink_char_type_column_suffix_zero(_char_type_idx);
+            return ret;
+        }
 
-            // step4: output columns
-            // 4.1 output non-predicate column
-            _output_non_pred_columns(block);
+        // step3: read non_predicate column
+        _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx, selected_size,
+                                &_current_return_columns);
 
-            // 4.2 get union of short_cir_pred and vec_pred
-            std::set<ColumnId> pred_column_ids;
-            pred_column_ids.insert(_short_cir_pred_column_ids.begin(),
-                                   _short_cir_pred_column_ids.end());
-            pred_column_ids.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
+        // step4: output columns
+        // 4.1 output non-predicate column
+        _output_non_pred_columns(block);
 
-            // 4.3 output short circuit and predicate column
-            RETURN_IF_ERROR(_output_column_by_sel_idx(block, pred_column_ids, sel_rowid_idx,
-                                                      selected_size));
-        }
+        // 4.3 output short circuit and predicate column
+        // when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
+        // see _vec_init_lazy_materialization
+        // todo(wb) need to tell input columnids from output columnids
+        RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
+                                                  selected_size));
     }
 
     // shrink char_type suffix zero data
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 489d19b2a2..0ce9975456 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -146,9 +146,10 @@ private:
     // remember the rowids we've read for the current row block.
     // could be a local variable of next_batch(), kept here to reuse vector memory
     std::vector<rowid_t> _block_rowids;
+    bool _is_need_vec_eval = false;
+    bool _is_need_short_eval = false;
 
     // fields for vectorization execution
-    bool _is_all_column_basic_type;
     std::vector<ColumnId>
             _vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation
     std::vector<ColumnId>
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy
index 2a7f770efe..b3fb96ed1f 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy
@@ -38,6 +38,8 @@ PROPERTIES (
 )
 """
 
+    sql "set enable_vectorized_engine = false"
+
     sql """insert into ${table1} values
         (9,10,11,12),
         (9,10,11,12),
@@ -57,6 +59,8 @@ PROPERTIES (
         (5,6,7,8)
 """
 
+    sql "set enable_vectorized_engine = true"
+
     test {
         // siteid column not contain null
         sql "select siteid,citycode,userid,pv from ${table1} where siteid = 21 "
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy
index 6dbe34a61a..1ab4590957 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy
@@ -37,6 +37,9 @@ suite("test_dup_tab_basic_varchar_nullable") {
     "storage_format" = "V2"
     )
     """
+
+    sql "set enable_vectorized_engine = false"
+
     sql """insert into ${table1} values(null,'qie3','yy','lj'),
         (null,'hehe',null,'lala'),
         ('beijing','xuanwu','wugui',null),
@@ -47,6 +50,8 @@ suite("test_dup_tab_basic_varchar_nullable") {
         ('tengxun2','qie',null,'lj')
 """
 
+    sql "set enable_vectorized_engine = true"
+
     // read single column
     test {
         sql "select city from ${table1} order by city"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy
index e67c93d227..fbdb4b78be 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy
@@ -38,6 +38,7 @@ PROPERTIES (
 )
 
     """
+    sql "set enable_vectorized_engine = false"
 
     sql """insert into ${table1} values
        ('a1','a2','a3','a4'),
@@ -48,6 +49,7 @@ PROPERTIES (
        ('e1','e2','e3','e4'),
        (null,'e2',null,'e4')
 """
+    sql "set enable_vectorized_engine = true"
 
     qt_read_single_column_1 "select city from ${table1} where city in ('a1','e1')"
     qt_read_single_column_2 "select city from ${table1} where city not in ('a1','e1')"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy
index 122cfe2717..c9613fb6dc 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy
@@ -39,6 +39,8 @@ PROPERTIES (
 
     """
 
+    sql "set enable_vectorized_engine = false"
+
     sql """insert into ${table1} values
         (1, '2021-04-01', '2021-04-02', '2021-04-03'),
         (1, '2021-03-01', '2021-03-02', '2021-03-03'),
@@ -47,6 +49,8 @@ PROPERTIES (
         (null, '2021-05-01', 'null', '2021-04-03')
 """
 
+    sql "set enable_vectorized_engine = true"
+
     qt_sql1 "select date1 from ${table1} order by date1"
 
     // read single column
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy
index f295b5cec0..4833faee2b 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy
@@ -39,6 +39,8 @@ PROPERTIES (
 
     """
 
+    sql "set enable_vectorized_engine = false"
+
     sql """insert into ${table1} values
         (1,'2021-01-01 23:10:01','2021-01-02 23:10:04','2021-01-02 22:10:04'),
         (2,'2021-02-01 23:10:01','2021-02-02 23:10:04','2021-03-02 22:10:04'),
@@ -48,6 +50,8 @@ PROPERTIES (
         (null,'2021-06-01 23:10:01',null,'2021-06-02 22:10:04')
 """
 
+    sql "set enable_vectorized_engine = true"
+
     qt_read_single_column_1 "select datetime1 from ${table1}"
     qt_read_single_column_2 "select siteid from ${table1}"
 
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy
index 2fa2eb2386..a858ef7ea1 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy
@@ -38,6 +38,8 @@ PROPERTIES (
 )
     """
 
+    sql "set enable_vectorized_engine = false"
+
     sql """insert into ${table1} values(1.1,1.2,1.3,1.4),
         (1.1,2.2,2.3,3.4),
         (2.1,2.2,2.3,2.4),
@@ -46,6 +48,8 @@ PROPERTIES (
         (null,2,null,4)
 """
 
+    sql "set enable_vectorized_engine = true"
+
     // query decimal
     test {
         sql "select siteid from ${table1} order by siteid"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy
index 924ecc2c68..c729f8a62c 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy
@@ -42,6 +42,8 @@ PROPERTIES (
 )
 """
 
+    sql "set enable_vectorized_engine = false"
+
     sql """insert into ${table1} values(1,2,3.1,4.2,5.3,5.4,'a1','a2'),
             (2,3,4.1,5.2,6.3,7.4,'b1','b2'),
             (3,4,5.1,6.2,7.3,8.4,'c1','c2'),
@@ -50,6 +52,7 @@ PROPERTIES (
             (5,6,5.1,8.2,6.3,11.4,'e1','e2'),
             (null,7,null,8,null,9,null,'e3')
         """
+    sql "set enable_vectorized_engine = true"
 
     // read int and string
     test {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org