You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by wa...@apache.org on 2022/05/06 11:16:43 UTC
[incubator-doris] branch master updated: [refactor]refactor lazy materialized (#8834)
This is an automated email from the ASF dual-hosted git repository.
wangbo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new a9831f87f2 [refactor]refactor lazy materialized (#8834)
a9831f87f2 is described below
commit a9831f87f25aa1e5b2c55cfce141c510b10e2a42
Author: wangbo <wa...@apache.org>
AuthorDate: Fri May 6 19:16:35 2022 +0800
[refactor]refactor lazy materialized (#8834)
[refactor]refactor lazy materialized (#8834)
---
be/src/exec/olap_scan_node.cpp | 2 +-
be/src/exec/olap_scan_node.h | 2 +-
be/src/exec/olap_scanner.cpp | 2 +-
be/src/olap/column_predicate.h | 2 +-
be/src/olap/null_predicate.cpp | 2 +-
be/src/olap/olap_common.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 227 ++++++++++++---------
be/src/olap/rowset/segment_v2/segment_iterator.h | 3 +-
.../storage/test_dup_tab_basic_int_nullable.groovy | 4 +
.../test_dup_tab_basic_varchar_nullable.groovy | 5 +
.../storage/test_dup_tab_char_nullable.groovy | 2 +
.../storage/test_dup_tab_date_nullable.groovy | 4 +
.../storage/test_dup_tab_datetime_nullable.groovy | 4 +
.../storage/test_dup_tab_decimal_nullable.groovy | 4 +
.../test_dup_tab_mixed_type_nullable.groovy | 3 +
15 files changed, 165 insertions(+), 103 deletions(-)
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index 61befd7eda..c0ec6f1150 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -126,7 +126,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
_rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT);
_vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime");
_short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime");
- _pred_col_read_timer = ADD_TIMER(_segment_profile, "PredColumnReadTime");
+ _first_read_timer = ADD_TIMER(_segment_profile, "FirstReadTime");
_lazy_read_timer = ADD_TIMER(_segment_profile, "LazyReadTime");
_output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime");
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index 688c4517a3..6313101b2b 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -290,7 +290,7 @@ protected:
RuntimeProfile::Counter* _rows_vec_cond_counter = nullptr;
RuntimeProfile::Counter* _vec_cond_timer = nullptr;
RuntimeProfile::Counter* _short_cond_timer = nullptr;
- RuntimeProfile::Counter* _pred_col_read_timer = nullptr;
+ RuntimeProfile::Counter* _first_read_timer = nullptr;
RuntimeProfile::Counter* _lazy_read_timer = nullptr;
RuntimeProfile::Counter* _output_col_timer = nullptr;
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 2478d2b2bf..40029414b8 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -545,7 +545,7 @@ void OlapScanner::update_counter() {
// COUNTER_UPDATE(_parent->_filtered_rows_counter, stats.num_rows_filtered);
COUNTER_UPDATE(_parent->_vec_cond_timer, stats.vec_cond_ns);
COUNTER_UPDATE(_parent->_short_cond_timer, stats.short_cond_ns);
- COUNTER_UPDATE(_parent->_pred_col_read_timer, stats.pred_col_read_ns);
+ COUNTER_UPDATE(_parent->_first_read_timer, stats.first_read_ns);
COUNTER_UPDATE(_parent->_lazy_read_timer, stats.lazy_read_ns);
COUNTER_UPDATE(_parent->_output_col_timer, stats.output_col_ns);
COUNTER_UPDATE(_parent->_rows_vec_cond_counter, stats.rows_vec_cond_filtered);
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 7ebb4604f0..7aa3218314 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -43,7 +43,7 @@ enum class PredicateType {
IN_LIST = 7,
NOT_IN_LIST = 8,
IS_NULL = 9,
- NOT_IS_NULL = 10,
+ IS_NOT_NULL = 10,
BF = 11, // BloomFilter
};
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index 43cfbcaab3..1210e1f439 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -30,7 +30,7 @@ NullPredicate::NullPredicate(uint32_t column_id, bool is_null, bool opposite)
: ColumnPredicate(column_id), _is_null(opposite != is_null) {}
PredicateType NullPredicate::type() const {
- return _is_null ? PredicateType::IS_NULL : PredicateType::NOT_IS_NULL;
+ return _is_null ? PredicateType::IS_NULL : PredicateType::IS_NOT_NULL;
}
void NullPredicate::evaluate(VectorizedRowBatch* batch) const {
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 3afa23f983..c8256d0f2e 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -265,7 +265,7 @@ struct OlapReaderStatistics {
int64_t rows_vec_del_cond_filtered = 0;
int64_t vec_cond_ns = 0;
int64_t short_cond_ns = 0;
- int64_t pred_col_read_ns = 0;
+ int64_t first_read_ns = 0;
int64_t lazy_read_ns = 0;
int64_t output_col_ns = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 0fd4a16ee4..ec2093e2f8 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -523,7 +523,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
// phase 1: read rows selected by various index (indicated by _row_bitmap) into block
// when using lazy-materialization-read, only columns with predicates are read
{
- SCOPED_RAW_TIMER(&_opts.stats->pred_col_read_ns);
+ SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
do {
uint32_t range_from;
uint32_t range_to;
@@ -607,22 +607,45 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
/* ---------------------- for vecterization implementation ---------------------- */
+/**
+ * For storage layer data type, can be measured from two perspectives:
+ * 1 Whether the type can be read in a fast way(batch read using SIMD)
+ * Such as integer type and float type, this type can be read in SIMD way.
+ * For the type string/bitmap/hll, they can not be read in batch way, so read this type data is slow.
+ * If a type can be read fast, we can try to eliminate Lazy Materialization, because we think for this type, seek cost > read cost.
+ * This is an estimate, if we want more precise cost, statistics collection is necessary(this is a todo).
+ * In short, when returned non-pred columns contains string/hll/bitmap, we using Lazy Materialization.
+ * Otherwish, we disable it.
+ *
+ * When Lazy Materialization enable, we need to read column at least two times.
+ * Firt time to read Pred col, second time to read non-pred.
+ * Here's an interesting question to research, whether read Pred col once is the best plan.
+ * (why not read Pred col twice or more?)
+ *
+ * When Lazy Materialization disable, we just need to read once.
+ *
+ *
+ * 2 Whether the predicate type can be evaluate in a fast way(using SIMD to eval pred)
+ * Such as integer type and float type, they can be eval fast.
+ * But for BloomFilter/string/date, they eval slow.
+ * If a type can be eval fast, we use vectorizaion to eval it.
+ * Otherwise, we use short-circuit to eval it.
+ *
+ *
+ */
+
// todo(wb) need a UT here
void SegmentIterator::_vec_init_lazy_materialization() {
_is_pred_column.resize(_schema.columns().size(), false);
- // including short_cir_pred_col_id_set and vec_pred_col_id_set
+ // including short/vec/delete pred
std::set<ColumnId> pred_column_ids;
- _is_all_column_basic_type = true;
- bool is_predicate_column_exists = false;
- bool is_non_predicate_column_exists = false;
+ _lazy_materialization_read = false;
std::set<ColumnId> del_cond_id_set;
_opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set);
if (!_col_predicates.empty() || !del_cond_id_set.empty()) {
- is_predicate_column_exists = true;
-
std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid
std::set<ColumnId> vec_pred_col_id_set;
@@ -632,13 +655,16 @@ void SegmentIterator::_vec_init_lazy_materialization() {
_is_pred_column[cid] = true;
pred_column_ids.insert(cid);
+ // Step1: check pred using short eval or vec eval
if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF ||
predicate->type() == PredicateType::IN_LIST ||
- predicate->type() == PredicateType::NOT_IN_LIST) {
+ predicate->type() == PredicateType::NOT_IN_LIST ||
+ predicate->type() == PredicateType::IS_NULL ||
+ predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE ||
+ type == OLAP_FIELD_TYPE_DECIMAL) {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);
- _is_all_column_basic_type = false;
} else {
vec_pred_col_id_set.insert(predicate->column_id());
if (_pre_eval_block_predicate == nullptr) {
@@ -653,75 +679,84 @@ void SegmentIterator::_vec_init_lazy_materialization() {
if (!del_cond_id_set.empty()) {
short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end());
pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end());
- _is_all_column_basic_type = false;
for (auto cid : del_cond_id_set) {
_is_pred_column[cid] = true;
}
}
- if (_schema.column_ids().size() > pred_column_ids.size()) {
- for (auto cid : _schema.column_ids()) {
- if (!_is_pred_column[cid]) {
- _non_predicate_columns.push_back(cid);
- is_non_predicate_column_exists = true;
-
- // todo(wb) make a cost-based lazy-materialization framework
- // check non-pred column type to decide whether using lazy-materialization
- FieldType type = _schema.column(cid)->type();
- if (_is_all_column_basic_type &&
- (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT ||
- type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
- type == OLAP_FIELD_TYPE_STRING)) {
- _is_all_column_basic_type = false;
- }
- }
- }
- }
-
_vec_pred_column_ids.assign(vec_pred_col_id_set.cbegin(), vec_pred_col_id_set.cend());
_short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(),
short_cir_pred_col_id_set.cend());
- } else {
- _is_all_column_basic_type = false;
- is_non_predicate_column_exists = true;
+ }
+
+ if (!_vec_pred_column_ids.empty()) {
+ _is_need_vec_eval = true;
+ }
+ if (!_short_cir_pred_column_ids.empty()) {
+ _is_need_short_eval = true;
+ }
+
+ // Step 2: check non-predicate read costs to determine whether need lazy materialization
+ // fill _non_predicate_columns.
+ // note(wb) For block schema, query layer and storage layer may have some diff
+ // query layer block schema not contains delete column, but storage layer appends delete column to end of block schema
+ // When output block to query layer, delete column can be skipped.
+ // _schema.column_ids() stands for storage layer block schema, so it contains delete columnid
+ // we just regard delete column as common pred column here.
+ if (_schema.column_ids().size() > pred_column_ids.size()) {
for (auto cid : _schema.column_ids()) {
- _non_predicate_columns.push_back(cid);
+ if (!_is_pred_column[cid]) {
+ _non_predicate_columns.push_back(cid);
+ FieldType type = _schema.column(cid)->type();
+
+ // todo(wb) maybe we can make read char type faster
+ // todo(wb) support map/array type
+ // todo(wb) consider multiple integer columns cost, such as 1000 columns, maybe lazy materialization faster
+ if (!_lazy_materialization_read &&
+ (_is_need_vec_eval ||
+ _is_need_short_eval) && // only when pred exists, we need to consider lazy materialization
+ (type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT ||
+ type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
+ type == OLAP_FIELD_TYPE_STRING || type == OLAP_FIELD_TYPE_BOOL ||
+ type == OLAP_FIELD_TYPE_DATE || type == OLAP_FIELD_TYPE_DATETIME ||
+ type == OLAP_FIELD_TYPE_DECIMAL)) {
+ _lazy_materialization_read = true;
+ }
+ }
}
}
- // note(wb) in following cases we disable lazy materialization
- // case 1: when all column is basic type(is_all_column_basic_type = true)
- // because we think `seek and read` cost > read page cost, lazy materialize may cause more `seek and read`, so disable it
- // case 2: all column is predicate column
- // case 3: all column is not predicate column
- // todo(wb) need further research more lazy materialization rule, such as get more info from `statistics` for better decision
- if (_is_all_column_basic_type) {
- std::set<ColumnId> pred_set(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
+ // Step 3: fill column ids for read and output
+ if (_lazy_materialization_read) {
+ // insert pred cid to first_read_columns
+ for (auto cid : pred_column_ids) {
+ _first_read_column_ids.push_back(cid);
+ }
+ } else if (!_is_need_vec_eval &&
+ !_is_need_short_eval) { // no pred exists, just read and output column
+ for (int i = 0; i < _schema.num_column_ids(); i++) {
+ auto cid = _schema.column_id(i);
+ _first_read_column_ids.push_back(cid);
+ }
+ } else { // pred exits, but we can eliminate lazy materialization
+ // insert pred/non-pred cid to first read columns
+ std::set<ColumnId> pred_id_set;
+ pred_id_set.insert(_short_cir_pred_column_ids.begin(), _short_cir_pred_column_ids.end());
+ pred_id_set.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(),
_non_predicate_columns.end());
- // when _is_all_column_basic_type = true, _first_read_column_ids should keep the same order with _schema.column_ids which stands for return column order
for (int i = 0; i < _schema.num_column_ids(); i++) {
auto cid = _schema.column_id(i);
- if (pred_set.find(cid) != pred_set.end()) {
+ if (pred_id_set.find(cid) != pred_id_set.end()) {
_first_read_column_ids.push_back(cid);
} else if (non_pred_set.find(cid) != non_pred_set.end()) {
_first_read_column_ids.push_back(cid);
- // in this case, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
+ // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
_is_pred_column[cid] = true;
}
}
-
- } else if (is_predicate_column_exists && !is_non_predicate_column_exists) {
- _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend());
- } else if (!is_predicate_column_exists && is_non_predicate_column_exists) {
- for (auto cid : _non_predicate_columns) {
- _first_read_column_ids.push_back(cid);
- }
- } else {
- _lazy_materialization_read = true;
- _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend());
}
// make _schema_block_id_map
@@ -787,7 +822,7 @@ void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read,
bool set_block_rowid) {
- SCOPED_RAW_TIMER(&_opts.stats->pred_col_read_ns);
+ SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
do {
uint32_t range_from;
uint32_t range_to;
@@ -818,7 +853,7 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
uint16_t& selected_size) {
SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
- if (_vec_pred_column_ids.empty()) {
+ if (!_is_need_vec_eval) {
for (uint32_t i = 0; i < selected_size; ++i) {
sel_rowid_idx[i] = i;
}
@@ -859,7 +894,7 @@ void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_idx,
uint16_t* selected_size_ptr) {
SCOPED_RAW_TIMER(&_opts.stats->short_cond_ns);
- if (_short_cir_pred_column_ids.empty()) {
+ if (!_is_need_short_eval) {
return;
}
@@ -911,17 +946,19 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
if (UNLIKELY(!_inited)) {
RETURN_IF_ERROR(_init(true));
_inited = true;
- if (!_vec_pred_column_ids.empty() || !_short_cir_pred_column_ids.empty()) {
+ if (_lazy_materialization_read) {
_block_rowids.resize(_opts.block_row_max);
}
_current_return_columns.resize(_schema.columns().size());
- for (size_t i = 0; i < _schema.num_column_ids(); i++) {
- auto cid = _schema.column_id(i);
- if (_is_pred_column[cid]) {
- auto column_desc = _schema.column(cid);
- _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
- column_desc->type(), column_desc->is_nullable());
- _current_return_columns[cid]->reserve(_opts.block_row_max);
+ if (_is_need_vec_eval || _is_need_short_eval) {
+ for (size_t i = 0; i < _schema.num_column_ids(); i++) {
+ auto cid = _schema.column_id(i);
+ if (_is_pred_column[cid]) {
+ auto column_desc = _schema.column(cid);
+ _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
+ column_desc->type(), column_desc->is_nullable());
+ _current_return_columns[cid]->reserve(_opts.block_row_max);
+ }
}
}
}
@@ -947,48 +984,46 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
return Status::EndOfFile("no more data in segment");
}
- // when no predicate(include delete condition) is provided, output column directly
- if (_vec_pred_column_ids.empty() && _short_cir_pred_column_ids.empty()) {
+ if (!_is_need_vec_eval && !_is_need_short_eval) {
_output_non_pred_columns(block);
- } else { // need predicate evaluation
+ } else {
uint16_t selected_size = nrows_read;
uint16_t sel_rowid_idx[selected_size];
// step 1: evaluate vectorization predicate
_evaluate_vectorization_predicate(sel_rowid_idx, selected_size);
- // When predicate column and no-predicate column are both basic type, lazy materialization is eliminate
- // So output block directly after vectorization evaluation
- if (_is_all_column_basic_type) {
- RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
- selected_size));
- } else {
- // step 2: evaluate short ciruit predicate
- // todo(wb) research whether need to read short predicate after vectorization evaluation
- // to reduce cost of read short circuit columns.
- // In SSB test, it make no difference; So need more scenarios to test
- _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
-
- // step3: read non_predicate column
- if (!_non_predicate_columns.empty()) {
- _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx,
- selected_size, &_current_return_columns);
+ // step 2: evaluate short ciruit predicate
+ // todo(wb) research whether need to read short predicate after vectorization evaluation
+ // to reduce cost of read short circuit columns.
+ // In SSB test, it make no difference; So need more scenarios to test
+ _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
+
+ if (!_lazy_materialization_read) {
+ Status ret = _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
+ selected_size);
+ if (!ret.ok()) {
+ return ret;
}
+ // shrink char_type suffix zero data
+ block->shrink_char_type_column_suffix_zero(_char_type_idx);
+ return ret;
+ }
- // step4: output columns
- // 4.1 output non-predicate column
- _output_non_pred_columns(block);
+ // step3: read non_predicate column
+ _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx, selected_size,
+ &_current_return_columns);
- // 4.2 get union of short_cir_pred and vec_pred
- std::set<ColumnId> pred_column_ids;
- pred_column_ids.insert(_short_cir_pred_column_ids.begin(),
- _short_cir_pred_column_ids.end());
- pred_column_ids.insert(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
+ // step4: output columns
+ // 4.1 output non-predicate column
+ _output_non_pred_columns(block);
- // 4.3 output short circuit and predicate column
- RETURN_IF_ERROR(_output_column_by_sel_idx(block, pred_column_ids, sel_rowid_idx,
- selected_size));
- }
+ // 4.3 output short circuit and predicate column
+ // when lazy materialization enables, _first_read_column_ids = distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
+ // see _vec_init_lazy_materialization
+ // todo(wb) need to tell input columnids from output columnids
+ RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx,
+ selected_size));
}
// shrink char_type suffix zero data
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 489d19b2a2..0ce9975456 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -146,9 +146,10 @@ private:
// remember the rowids we've read for the current row block.
// could be a local variable of next_batch(), kept here to reuse vector memory
std::vector<rowid_t> _block_rowids;
+ bool _is_need_vec_eval = false;
+ bool _is_need_short_eval = false;
// fields for vectorization execution
- bool _is_all_column_basic_type;
std::vector<ColumnId>
_vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation
std::vector<ColumnId>
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy
index 2a7f770efe..b3fb96ed1f 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_int_nullable.groovy
@@ -38,6 +38,8 @@ PROPERTIES (
)
"""
+ sql "set enable_vectorized_engine = false"
+
sql """insert into ${table1} values
(9,10,11,12),
(9,10,11,12),
@@ -57,6 +59,8 @@ PROPERTIES (
(5,6,7,8)
"""
+ sql "set enable_vectorized_engine = true"
+
test {
// siteid column not contain null
sql "select siteid,citycode,userid,pv from ${table1} where siteid = 21 "
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy
index 6dbe34a61a..1ab4590957 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_basic_varchar_nullable.groovy
@@ -37,6 +37,9 @@ suite("test_dup_tab_basic_varchar_nullable") {
"storage_format" = "V2"
)
"""
+
+ sql "set enable_vectorized_engine = false"
+
sql """insert into ${table1} values(null,'qie3','yy','lj'),
(null,'hehe',null,'lala'),
('beijing','xuanwu','wugui',null),
@@ -47,6 +50,8 @@ suite("test_dup_tab_basic_varchar_nullable") {
('tengxun2','qie',null,'lj')
"""
+ sql "set enable_vectorized_engine = true"
+
// read single column
test {
sql "select city from ${table1} order by city"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy
index e67c93d227..fbdb4b78be 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_char_nullable.groovy
@@ -38,6 +38,7 @@ PROPERTIES (
)
"""
+ sql "set enable_vectorized_engine = false"
sql """insert into ${table1} values
('a1','a2','a3','a4'),
@@ -48,6 +49,7 @@ PROPERTIES (
('e1','e2','e3','e4'),
(null,'e2',null,'e4')
"""
+ sql "set enable_vectorized_engine = true"
qt_read_single_column_1 "select city from ${table1} where city in ('a1','e1')"
qt_read_single_column_2 "select city from ${table1} where city not in ('a1','e1')"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy
index 122cfe2717..c9613fb6dc 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_date_nullable.groovy
@@ -39,6 +39,8 @@ PROPERTIES (
"""
+ sql "set enable_vectorized_engine = false"
+
sql """insert into ${table1} values
(1, '2021-04-01', '2021-04-02', '2021-04-03'),
(1, '2021-03-01', '2021-03-02', '2021-03-03'),
@@ -47,6 +49,8 @@ PROPERTIES (
(null, '2021-05-01', 'null', '2021-04-03')
"""
+ sql "set enable_vectorized_engine = true"
+
qt_sql1 "select date1 from ${table1} order by date1"
// read single column
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy
index f295b5cec0..4833faee2b 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_datetime_nullable.groovy
@@ -39,6 +39,8 @@ PROPERTIES (
"""
+ sql "set enable_vectorized_engine = false"
+
sql """insert into ${table1} values
(1,'2021-01-01 23:10:01','2021-01-02 23:10:04','2021-01-02 22:10:04'),
(2,'2021-02-01 23:10:01','2021-02-02 23:10:04','2021-03-02 22:10:04'),
@@ -48,6 +50,8 @@ PROPERTIES (
(null,'2021-06-01 23:10:01',null,'2021-06-02 22:10:04')
"""
+ sql "set enable_vectorized_engine = true"
+
qt_read_single_column_1 "select datetime1 from ${table1}"
qt_read_single_column_2 "select siteid from ${table1}"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy
index 2fa2eb2386..a858ef7ea1 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_decimal_nullable.groovy
@@ -38,6 +38,8 @@ PROPERTIES (
)
"""
+ sql "set enable_vectorized_engine = false"
+
sql """insert into ${table1} values(1.1,1.2,1.3,1.4),
(1.1,2.2,2.3,3.4),
(2.1,2.2,2.3,2.4),
@@ -46,6 +48,8 @@ PROPERTIES (
(null,2,null,4)
"""
+ sql "set enable_vectorized_engine = true"
+
// query decimal
test {
sql "select siteid from ${table1} order by siteid"
diff --git a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy
index 924ecc2c68..c729f8a62c 100644
--- a/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy
+++ b/regression-test/suites/data_model/duplicate/storage/test_dup_tab_mixed_type_nullable.groovy
@@ -42,6 +42,8 @@ PROPERTIES (
)
"""
+ sql "set enable_vectorized_engine = false"
+
sql """insert into ${table1} values(1,2,3.1,4.2,5.3,5.4,'a1','a2'),
(2,3,4.1,5.2,6.3,7.4,'b1','b2'),
(3,4,5.1,6.2,7.3,8.4,'c1','c2'),
@@ -50,6 +52,7 @@ PROPERTIES (
(5,6,5.1,8.2,6.3,11.4,'e1','e2'),
(null,7,null,8,null,9,null,'e3')
"""
+ sql "set enable_vectorized_engine = true"
// read int and string
test {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org