You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/05/17 06:42:27 UTC
[incubator-doris] branch master updated: [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 536d8ca1ed [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
536d8ca1ed is described below
commit 536d8ca1ed2c5897859fd6d029555d1bbf8ddad2
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Tue May 17 14:42:20 2022 +0800
[Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
Co-authored-by: lihaopeng <li...@baidu.com>
---
be/src/vec/sink/vtablet_sink.cpp | 215 +++++++++++++++----------------
be/src/vec/sink/vtablet_sink.h | 6 +-
regression-test/suites/query/load.groovy | 22 ++++
3 files changed, 132 insertions(+), 111 deletions(-)
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index a86ef57cf2..16edd09e77 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -393,6 +393,7 @@ Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block)
// because of "data unqualified"
return Status::EndOfFile("Encountered unqualified data, stop processing");
}
+ _convert_to_dest_desc_block(&block);
}
BlockRow block_row;
@@ -474,132 +475,105 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
block->get_by_position(i).column->convert_to_full_column_if_const();
const auto& column = block->get_by_position(i).column;
- if (desc->type() == TYPE_OBJECT && column->is_nullable()) {
- const auto& null_map =
- vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)
- ->get_null_map_data();
- fmt::format_to(error_msg, "null is not allowed for bitmap column, column_name: {}; ",
- desc->col_name());
+ auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+ auto& real_column_ptr =
+ column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
+ switch (desc->type().type) {
+ case TYPE_CHAR:
+ case TYPE_VARCHAR:
+ case TYPE_STRING: {
+ const auto column_string =
+ assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
+
+ size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
for (int j = 0; j < num_rows; ++j) {
if (!filter_bitmap->Get(j)) {
- if (null_map[j]) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
+ auto str_val = column_string->get_data_at(j);
+ bool invalid = str_val.size > limit;
+
+ error_msg.clear();
+ if (str_val.size > desc->type().len) {
+ fmt::format_to(error_msg, "{}",
+ "the length of input is too long than schema. ");
+ fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+ fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+ fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+ fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
+ } else if (str_val.size > limit) {
+ fmt::format_to(error_msg, "{}",
+ "the length of input string is too long than vec schema. ");
+ fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+ fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+ fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+ fmt::format_to(error_msg, "limit length: {}; ", limit);
+ fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
}
- }
- }
- } else {
- auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
- auto& real_column_ptr =
- column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
-
- switch (desc->type().type) {
- case TYPE_CHAR:
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- const auto column_string =
- assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
-
- size_t limit =
- std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
- for (int j = 0; j < num_rows; ++j) {
- if (!filter_bitmap->Get(j)) {
- auto str_val = column_string->get_data_at(j);
- bool invalid = str_val.size > limit;
-
- error_msg.clear();
- if (str_val.size > desc->type().len) {
- fmt::format_to(error_msg, "{}",
- "the length of input is too long than schema. ");
- fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
- fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
- fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
- fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
- } else if (str_val.size > limit) {
- fmt::format_to(
- error_msg, "{}",
- "the length of input string is too long than vec schema. ");
- fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
- fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
- fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
- fmt::format_to(error_msg, "limit length: {}; ", limit);
- fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
- }
- if (invalid) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
+ if (invalid) {
+ RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
}
- break;
}
- case TYPE_DECIMALV2: {
- auto column_decimal = const_cast<
- vectorized::ColumnDecimal<vectorized::Decimal128>*>(
- assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
- real_column_ptr.get()));
-
- for (int j = 0; j < num_rows; ++j) {
- if (!filter_bitmap->Get(j)) {
- auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
- column_decimal->get_data()[j]);
- error_msg.clear();
- bool invalid = false;
-
- if (dec_val.greater_than_scale(desc->type().scale)) {
- auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
- column_decimal->get_data()[j] =
- binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
-
- if (code != E_DEC_OK) {
- fmt::format_to(error_msg, "round one decimal failed.value={}; ",
- dec_val.to_string());
- invalid = true;
- }
- }
- if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
- fmt::format_to(error_msg,
- "decimal value is not valid for definition, column={}",
- desc->col_name());
- fmt::format_to(error_msg, ", value={}", dec_val.to_string());
- fmt::format_to(error_msg, ", precision={}, scale={}; ",
- desc->type().precision, desc->type().scale);
+ break;
+ }
+ case TYPE_DECIMALV2: {
+ auto column_decimal = const_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+ assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+ real_column_ptr.get()));
+
+ for (int j = 0; j < num_rows; ++j) {
+ if (!filter_bitmap->Get(j)) {
+ auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
+ column_decimal->get_data()[j]);
+ error_msg.clear();
+ bool invalid = false;
+
+ if (dec_val.greater_than_scale(desc->type().scale)) {
+ auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
+ column_decimal->get_data()[j] =
+ binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
+
+ if (code != E_DEC_OK) {
+ fmt::format_to(error_msg, "round one decimal failed.value={}; ",
+ dec_val.to_string());
invalid = true;
}
+ }
+ if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
+ fmt::format_to(error_msg,
+ "decimal value is not valid for definition, column={}",
+ desc->col_name());
+ fmt::format_to(error_msg, ", value={}", dec_val.to_string());
+ fmt::format_to(error_msg, ", precision={}, scale={}; ",
+ desc->type().precision, desc->type().scale);
+ invalid = true;
+ }
- if (invalid) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
+ if (invalid) {
+ RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
}
- break;
- }
- default:
- break;
- }
-
- // Dispose the nullable column not match problem here, convert to nullable column
- if (desc->is_nullable() && !column_ptr) {
- block->get_by_position(i).column = vectorized::make_nullable(column);
- block->get_by_position(i).type =
- vectorized::make_nullable(block->get_by_position(i).type);
}
+ break;
+ }
+ default:
+ break;
+ }
- // Dispose the nullable column not match problem here, convert to not nullable column
- if (!desc->is_nullable() && column_ptr) {
- const auto& null_map = column_ptr->get_null_map_data();
- for (int j = 0; j < null_map.size(); ++j) {
- fmt::format_to(error_msg, "null value for not null column, column={}; ",
- desc->col_name());
- if (null_map[j] && !filter_bitmap->Get(j)) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
+ // Dispose the the column should do not contain the NULL value
+ // Only tow case:
+ // 1. column is nullable but the desc is not nullable
+ // 2. desc->type is BITMAP
+ if ((!desc->is_nullable() || desc->type() == TYPE_OBJECT) && column_ptr) {
+ const auto& null_map = column_ptr->get_null_map_data();
+ for (int j = 0; j < null_map.size(); ++j) {
+ fmt::format_to(error_msg,
+ "null value for not null column/or bitmap column, column={}; ",
+ desc->col_name());
+ if (null_map[j] && !filter_bitmap->Get(j)) {
+ RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
- block->get_by_position(i).column = column_ptr->get_nested_column_ptr();
- block->get_by_position(i).type =
- (reinterpret_cast<const vectorized::DataTypeNullable*>(
- block->get_by_position(i).type.get()))
- ->get_nested_type();
}
}
}
@@ -611,5 +585,26 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
return Status::OK();
}
+void VOlapTableSink::_convert_to_dest_desc_block(doris::vectorized::Block* block) {
+ for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
+ SlotDescriptor* desc = _output_tuple_desc->slots()[i];
+ if (desc->is_nullable() != block->get_by_position(i).type->is_nullable()) {
+ if (desc->is_nullable()) {
+ block->get_by_position(i).type =
+ vectorized::make_nullable(block->get_by_position(i).type);
+ block->get_by_position(i).column =
+ vectorized::make_nullable(block->get_by_position(i).column);
+ } else {
+ block->get_by_position(i).type = assert_cast<const vectorized::DataTypeNullable&>(
+ *block->get_by_position(i).type)
+ .get_nested_type();
+ block->get_by_position(i).column = assert_cast<const vectorized::ColumnNullable&>(
+ *block->get_by_position(i).column)
+ .get_nested_column_ptr();
+ }
+ }
+ }
+}
+
} // namespace stream_load
} // namespace doris
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
index 08b3f54434..65441d8069 100644
--- a/be/src/vec/sink/vtablet_sink.h
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -102,9 +102,13 @@ private:
Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap,
int* filtered_rows, bool* stop_processing);
+ // some output column of output expr may have different nullable property with dest slot desc
+ // so here need to do the convert operation
+ void _convert_to_dest_desc_block(vectorized::Block* block);
+
VOlapTablePartitionParam* _vpartition = nullptr;
std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
};
} // namespace stream_load
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/regression-test/suites/query/load.groovy b/regression-test/suites/query/load.groovy
index 93bf46be60..75e01c8c41 100644
--- a/regression-test/suites/query/load.groovy
+++ b/regression-test/suites/query/load.groovy
@@ -67,6 +67,28 @@ suite("load") {
}
sql "insert into ${dbName}.test select * from ${dbName}.baseall where k1 <= 3"
+ // table for compaction
+ sql """
+ CREATE TABLE compaction_tbl
+ (
+ user_id LARGEINT NOT NULL,
+ date DATE NOT NULL,
+ city VARCHAR(20),
+ age SMALLINT,
+ sex TINYINT,
+ last_visit_date DATETIME REPLACE DEFAULT "1970-01-01 00:00:00",
+ last_update_date DATETIME REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00",
+ last_visit_date_not_null DATETIME REPLACE NOT NULL DEFAULT "1970-01-01 00:00:00",
+ cost BIGINT SUM DEFAULT "0",
+ max_dwell_time INT MAX DEFAULT "0",
+ min_dwell_time INT MIN DEFAULT "99999",
+ hll_col HLL HLL_UNION NOT NULL,
+ bitmap_col Bitmap BITMAP_UNION NOT NULL
+ ) AGGREGATE KEY(user_id, date, city, age, sex)
+ DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");"""
+
+ sql """insert into compaction_tbl values(123,"1999-10-10",'aaa',123,123,"1970-01-01 00:00:00","1970-01-01 00:00:00","1970-01-01 00:00:00",123,123,123,hll_hash(""),bitmap_from_string(""));"""
+
def baseall_count = sql "select count(*) from ${dbName}.baseall"
assertEquals(16, baseall_count[0][0])
def test_count = sql "select count(*) from ${dbName}.test"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org