You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/05/18 08:34:36 UTC
[incubator-doris] 01/09: [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit ca952c91bbb78b09a2a9fb51d723eff84e21a8f1
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Tue May 17 14:42:20 2022 +0800
[Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
Co-authored-by: lihaopeng <li...@baidu.com>
---
be/src/vec/sink/vtablet_sink.cpp | 209 +++++++++++++++++++--------------------
be/src/vec/sink/vtablet_sink.h | 6 +-
2 files changed, 107 insertions(+), 108 deletions(-)
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index e9768c9460..ba3da67b99 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -96,6 +96,7 @@ Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block)
// because of "data unqualified"
return Status::EndOfFile("Encountered unqualified data, stop processing");
}
+ _convert_to_dest_desc_block(&block);
}
BlockRow block_row;
@@ -176,132 +177,105 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
block->get_by_position(i).column->convert_to_full_column_if_const();
const auto& column = block->get_by_position(i).column;
- if (desc->type() == TYPE_OBJECT && column->is_nullable()) {
- const auto& null_map =
- vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)
- ->get_null_map_data();
- fmt::format_to(error_msg, "null is not allowed for bitmap column, column_name: {}; ",
- desc->col_name());
+ auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+ auto& real_column_ptr =
+ column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
+ switch (desc->type().type) {
+ case TYPE_CHAR:
+ case TYPE_VARCHAR:
+ case TYPE_STRING: {
+ const auto column_string =
+ assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
+
+ size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
for (int j = 0; j < num_rows; ++j) {
if (!filter_bitmap->Get(j)) {
- if (null_map[j]) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
- }
- }
- } else {
- auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
- auto& real_column_ptr =
- column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
-
- switch (desc->type().type) {
- case TYPE_CHAR:
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- const auto column_string =
- assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
-
- size_t limit =
- std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
- for (int j = 0; j < num_rows; ++j) {
- if (!filter_bitmap->Get(j)) {
- auto str_val = column_string->get_data_at(j);
- bool invalid = str_val.size > limit;
+ auto str_val = column_string->get_data_at(j);
+ bool invalid = str_val.size > limit;
- error_msg.clear();
- if (str_val.size > desc->type().len) {
- fmt::format_to(error_msg, "{}",
- "the length of input is too long than schema. ");
- fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
- fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
- fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
- fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
- } else if (str_val.size > limit) {
- fmt::format_to(
- error_msg, "{}",
- "the length of input string is too long than vec schema. ");
- fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
- fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
- fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
- fmt::format_to(error_msg, "limit length: {}; ", limit);
- fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
- }
+ error_msg.clear();
+ if (str_val.size > desc->type().len) {
+ fmt::format_to(error_msg, "{}",
+ "the length of input is too long than schema. ");
+ fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+ fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+ fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+ fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
+ } else if (str_val.size > limit) {
+ fmt::format_to(error_msg, "{}",
+ "the length of input string is too long than vec schema. ");
+ fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+ fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+ fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+ fmt::format_to(error_msg, "limit length: {}; ", limit);
+ fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
+ }
- if (invalid) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
+ if (invalid) {
+ RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
}
- break;
}
- case TYPE_DECIMALV2: {
- auto column_decimal = const_cast<
- vectorized::ColumnDecimal<vectorized::Decimal128>*>(
- assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
- real_column_ptr.get()));
+ break;
+ }
+ case TYPE_DECIMALV2: {
+ auto column_decimal = const_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+ assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+ real_column_ptr.get()));
- for (int j = 0; j < num_rows; ++j) {
- if (!filter_bitmap->Get(j)) {
- auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
- column_decimal->get_data()[j]);
- error_msg.clear();
- bool invalid = false;
+ for (int j = 0; j < num_rows; ++j) {
+ if (!filter_bitmap->Get(j)) {
+ auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
+ column_decimal->get_data()[j]);
+ error_msg.clear();
+ bool invalid = false;
- if (dec_val.greater_than_scale(desc->type().scale)) {
- auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
- column_decimal->get_data()[j] =
- binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
+ if (dec_val.greater_than_scale(desc->type().scale)) {
+ auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
+ column_decimal->get_data()[j] =
+ binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
- if (code != E_DEC_OK) {
- fmt::format_to(error_msg, "round one decimal failed.value={}; ",
- dec_val.to_string());
- invalid = true;
- }
- }
- if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
- fmt::format_to(error_msg,
- "decimal value is not valid for definition, column={}",
- desc->col_name());
- fmt::format_to(error_msg, ", value={}", dec_val.to_string());
- fmt::format_to(error_msg, ", precision={}, scale={}; ",
- desc->type().precision, desc->type().scale);
+ if (code != E_DEC_OK) {
+ fmt::format_to(error_msg, "round one decimal failed.value={}; ",
+ dec_val.to_string());
invalid = true;
}
+ }
+ if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
+ fmt::format_to(error_msg,
+ "decimal value is not valid for definition, column={}",
+ desc->col_name());
+ fmt::format_to(error_msg, ", value={}", dec_val.to_string());
+ fmt::format_to(error_msg, ", precision={}, scale={}; ",
+ desc->type().precision, desc->type().scale);
+ invalid = true;
+ }
- if (invalid) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
+ if (invalid) {
+ RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
}
- break;
- }
- default:
- break;
- }
-
- // Dispose the nullable column not match problem here, convert to nullable column
- if (desc->is_nullable() && !column_ptr) {
- block->get_by_position(i).column = vectorized::make_nullable(column);
- block->get_by_position(i).type =
- vectorized::make_nullable(block->get_by_position(i).type);
}
+ break;
+ }
+ default:
+ break;
+ }
- // Dispose the nullable column not match problem here, convert to not nullable column
- if (!desc->is_nullable() && column_ptr) {
- const auto& null_map = column_ptr->get_null_map_data();
- for (int j = 0; j < null_map.size(); ++j) {
- fmt::format_to(error_msg, "null value for not null column, column={}; ",
- desc->col_name());
- if (null_map[j] && !filter_bitmap->Get(j)) {
- RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
- }
+ // Dispose the the column should do not contain the NULL value
+ // Only tow case:
+ // 1. column is nullable but the desc is not nullable
+ // 2. desc->type is BITMAP
+ if ((!desc->is_nullable() || desc->type() == TYPE_OBJECT) && column_ptr) {
+ const auto& null_map = column_ptr->get_null_map_data();
+ for (int j = 0; j < null_map.size(); ++j) {
+ fmt::format_to(error_msg,
+ "null value for not null column/or bitmap column, column={}; ",
+ desc->col_name());
+ if (null_map[j] && !filter_bitmap->Get(j)) {
+ RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
}
- block->get_by_position(i).column = column_ptr->get_nested_column_ptr();
- block->get_by_position(i).type =
- (reinterpret_cast<const vectorized::DataTypeNullable*>(
- block->get_by_position(i).type.get()))
- ->get_nested_type();
}
}
}
@@ -313,5 +287,26 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
return Status::OK();
}
+void VOlapTableSink::_convert_to_dest_desc_block(doris::vectorized::Block* block) {
+ for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
+ SlotDescriptor* desc = _output_tuple_desc->slots()[i];
+ if (desc->is_nullable() != block->get_by_position(i).type->is_nullable()) {
+ if (desc->is_nullable()) {
+ block->get_by_position(i).type =
+ vectorized::make_nullable(block->get_by_position(i).type);
+ block->get_by_position(i).column =
+ vectorized::make_nullable(block->get_by_position(i).column);
+ } else {
+ block->get_by_position(i).type = assert_cast<const vectorized::DataTypeNullable&>(
+ *block->get_by_position(i).type)
+ .get_nested_type();
+ block->get_by_position(i).column = assert_cast<const vectorized::ColumnNullable&>(
+ *block->get_by_position(i).column)
+ .get_nested_column_ptr();
+ }
+ }
+ }
+}
+
} // namespace stream_load
} // namespace doris
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
index 5514ff1909..d703334523 100644
--- a/be/src/vec/sink/vtablet_sink.h
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -58,9 +58,13 @@ private:
Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap,
int* filtered_rows, bool* stop_processing);
+ // some output column of output expr may have different nullable property with dest slot desc
+ // so here need to do the convert operation
+ void _convert_to_dest_desc_block(vectorized::Block* block);
+
VOlapTablePartitionParam* _vpartition = nullptr;
std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
};
} // namespace stream_load
-} // namespace doris
\ No newline at end of file
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org