You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/05/17 06:42:27 UTC

[incubator-doris] branch master updated: [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)

This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 536d8ca1ed [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
536d8ca1ed is described below

commit 536d8ca1ed2c5897859fd6d029555d1bbf8ddad2
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Tue May 17 14:42:20 2022 +0800

    [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408)
    
    Co-authored-by: lihaopeng <li...@baidu.com>
---
 be/src/vec/sink/vtablet_sink.cpp         | 215 +++++++++++++++----------------
 be/src/vec/sink/vtablet_sink.h           |   6 +-
 regression-test/suites/query/load.groovy |  22 ++++
 3 files changed, 132 insertions(+), 111 deletions(-)

diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index a86ef57cf2..16edd09e77 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -393,6 +393,7 @@ Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block)
             // because of "data unqualified"
             return Status::EndOfFile("Encountered unqualified data, stop processing");
         }
+        _convert_to_dest_desc_block(&block);
     }
 
     BlockRow block_row;
@@ -474,132 +475,105 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
                 block->get_by_position(i).column->convert_to_full_column_if_const();
         const auto& column = block->get_by_position(i).column;
 
-        if (desc->type() == TYPE_OBJECT && column->is_nullable()) {
-            const auto& null_map =
-                    vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)
-                            ->get_null_map_data();
-            fmt::format_to(error_msg, "null is not allowed for bitmap column, column_name: {}; ",
-                           desc->col_name());
+        auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+        auto& real_column_ptr =
+                column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
 
+        switch (desc->type().type) {
+        case TYPE_CHAR:
+        case TYPE_VARCHAR:
+        case TYPE_STRING: {
+            const auto column_string =
+                    assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
+
+            size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
             for (int j = 0; j < num_rows; ++j) {
                 if (!filter_bitmap->Get(j)) {
-                    if (null_map[j]) {
-                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
+                    auto str_val = column_string->get_data_at(j);
+                    bool invalid = str_val.size > limit;
+
+                    error_msg.clear();
+                    if (str_val.size > desc->type().len) {
+                        fmt::format_to(error_msg, "{}",
+                                       "the length of input is too long than schema. ");
+                        fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+                        fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+                        fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+                        fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
+                    } else if (str_val.size > limit) {
+                        fmt::format_to(error_msg, "{}",
+                                       "the length of input string is too long than vec schema. ");
+                        fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
+                        fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
+                        fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
+                        fmt::format_to(error_msg, "limit length: {}; ", limit);
+                        fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
                     }
-                }
-            }
-        } else {
-            auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
-            auto& real_column_ptr =
-                    column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
-
-            switch (desc->type().type) {
-            case TYPE_CHAR:
-            case TYPE_VARCHAR:
-            case TYPE_STRING: {
-                const auto column_string =
-                        assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
-
-                size_t limit =
-                        std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
-                for (int j = 0; j < num_rows; ++j) {
-                    if (!filter_bitmap->Get(j)) {
-                        auto str_val = column_string->get_data_at(j);
-                        bool invalid = str_val.size > limit;
-
-                        error_msg.clear();
-                        if (str_val.size > desc->type().len) {
-                            fmt::format_to(error_msg, "{}",
-                                           "the length of input is too long than schema. ");
-                            fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
-                            fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
-                            fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
-                            fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
-                        } else if (str_val.size > limit) {
-                            fmt::format_to(
-                                    error_msg, "{}",
-                                    "the length of input string is too long than vec schema. ");
-                            fmt::format_to(error_msg, "column_name: {}; ", desc->col_name());
-                            fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10));
-                            fmt::format_to(error_msg, "schema length: {}; ", desc->type().len);
-                            fmt::format_to(error_msg, "limit length: {}; ", limit);
-                            fmt::format_to(error_msg, "actual length: {}; ", str_val.size);
-                        }
 
-                        if (invalid) {
-                            RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                        }
+                    if (invalid) {
+                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
                     }
                 }
-                break;
             }
-            case TYPE_DECIMALV2: {
-                auto column_decimal = const_cast<
-                        vectorized::ColumnDecimal<vectorized::Decimal128>*>(
-                        assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
-                                real_column_ptr.get()));
-
-                for (int j = 0; j < num_rows; ++j) {
-                    if (!filter_bitmap->Get(j)) {
-                        auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
-                                column_decimal->get_data()[j]);
-                        error_msg.clear();
-                        bool invalid = false;
-
-                        if (dec_val.greater_than_scale(desc->type().scale)) {
-                            auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
-                            column_decimal->get_data()[j] =
-                                    binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
-
-                            if (code != E_DEC_OK) {
-                                fmt::format_to(error_msg, "round one decimal failed.value={}; ",
-                                               dec_val.to_string());
-                                invalid = true;
-                            }
-                        }
-                        if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
-                            fmt::format_to(error_msg,
-                                           "decimal value is not valid for definition, column={}",
-                                           desc->col_name());
-                            fmt::format_to(error_msg, ", value={}", dec_val.to_string());
-                            fmt::format_to(error_msg, ", precision={}, scale={}; ",
-                                           desc->type().precision, desc->type().scale);
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            auto column_decimal = const_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+                    assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>(
+                            real_column_ptr.get()));
+
+            for (int j = 0; j < num_rows; ++j) {
+                if (!filter_bitmap->Get(j)) {
+                    auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
+                            column_decimal->get_data()[j]);
+                    error_msg.clear();
+                    bool invalid = false;
+
+                    if (dec_val.greater_than_scale(desc->type().scale)) {
+                        auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
+                        column_decimal->get_data()[j] =
+                                binary_cast<DecimalV2Value, vectorized::Int128>(dec_val);
+
+                        if (code != E_DEC_OK) {
+                            fmt::format_to(error_msg, "round one decimal failed.value={}; ",
+                                           dec_val.to_string());
                             invalid = true;
                         }
+                    }
+                    if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
+                        fmt::format_to(error_msg,
+                                       "decimal value is not valid for definition, column={}",
+                                       desc->col_name());
+                        fmt::format_to(error_msg, ", value={}", dec_val.to_string());
+                        fmt::format_to(error_msg, ", precision={}, scale={}; ",
+                                       desc->type().precision, desc->type().scale);
+                        invalid = true;
+                    }
 
-                        if (invalid) {
-                            RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                        }
+                    if (invalid) {
+                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
                     }
                 }
-                break;
-            }
-            default:
-                break;
-            }
-
-            // Dispose the nullable column not match problem here, convert to nullable column
-            if (desc->is_nullable() && !column_ptr) {
-                block->get_by_position(i).column = vectorized::make_nullable(column);
-                block->get_by_position(i).type =
-                        vectorized::make_nullable(block->get_by_position(i).type);
             }
+            break;
+        }
+        default:
+            break;
+        }
 
-            // Dispose the nullable column not match problem here, convert to not nullable column
-            if (!desc->is_nullable() && column_ptr) {
-                const auto& null_map = column_ptr->get_null_map_data();
-                for (int j = 0; j < null_map.size(); ++j) {
-                    fmt::format_to(error_msg, "null value for not null column, column={}; ",
-                                   desc->col_name());
-                    if (null_map[j] && !filter_bitmap->Get(j)) {
-                        RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
-                    }
+        // Dispose the the column should do not contain the NULL value
+        // Only tow case:
+        // 1. column is nullable but the desc is not nullable
+        // 2. desc->type is BITMAP
+        if ((!desc->is_nullable() || desc->type() == TYPE_OBJECT) && column_ptr) {
+            const auto& null_map = column_ptr->get_null_map_data();
+            for (int j = 0; j < null_map.size(); ++j) {
+                fmt::format_to(error_msg,
+                               "null value for not null column/or bitmap column, column={}; ",
+                               desc->col_name());
+                if (null_map[j] && !filter_bitmap->Get(j)) {
+                    RETURN_IF_ERROR(set_invalid_and_append_error_msg(j));
                 }
-                block->get_by_position(i).column = column_ptr->get_nested_column_ptr();
-                block->get_by_position(i).type =
-                        (reinterpret_cast<const vectorized::DataTypeNullable*>(
-                                 block->get_by_position(i).type.get()))
-                                ->get_nested_type();
             }
         }
     }
@@ -611,5 +585,26 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
     return Status::OK();
 }
 
+void VOlapTableSink::_convert_to_dest_desc_block(doris::vectorized::Block* block) {
+    for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
+        SlotDescriptor* desc = _output_tuple_desc->slots()[i];
+        if (desc->is_nullable() != block->get_by_position(i).type->is_nullable()) {
+            if (desc->is_nullable()) {
+                block->get_by_position(i).type =
+                        vectorized::make_nullable(block->get_by_position(i).type);
+                block->get_by_position(i).column =
+                        vectorized::make_nullable(block->get_by_position(i).column);
+            } else {
+                block->get_by_position(i).type = assert_cast<const vectorized::DataTypeNullable&>(
+                                                         *block->get_by_position(i).type)
+                                                         .get_nested_type();
+                block->get_by_position(i).column = assert_cast<const vectorized::ColumnNullable&>(
+                                                           *block->get_by_position(i).column)
+                                                           .get_nested_column_ptr();
+            }
+        }
+    }
+}
+
 } // namespace stream_load
 } // namespace doris
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
index 08b3f54434..65441d8069 100644
--- a/be/src/vec/sink/vtablet_sink.h
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -102,9 +102,13 @@ private:
     Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap,
                           int* filtered_rows, bool* stop_processing);
 
+    // some output column of output expr may have different nullable property with dest slot desc
+    // so here need to do the convert operation
+    void _convert_to_dest_desc_block(vectorized::Block* block);
+
     VOlapTablePartitionParam* _vpartition = nullptr;
     std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
 };
 
 } // namespace stream_load
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/regression-test/suites/query/load.groovy b/regression-test/suites/query/load.groovy
index 93bf46be60..75e01c8c41 100644
--- a/regression-test/suites/query/load.groovy
+++ b/regression-test/suites/query/load.groovy
@@ -67,6 +67,28 @@ suite("load") {
     }
     sql "insert into ${dbName}.test select * from ${dbName}.baseall where k1 <= 3"
 
+    // table for compaction
+    sql """
+    CREATE TABLE compaction_tbl
+    (
+      user_id LARGEINT NOT NULL,
+      date DATE NOT NULL,
+      city VARCHAR(20),
+      age SMALLINT,
+      sex TINYINT,
+      last_visit_date DATETIME REPLACE DEFAULT "1970-01-01 00:00:00",
+      last_update_date DATETIME REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00",
+      last_visit_date_not_null DATETIME REPLACE NOT NULL DEFAULT "1970-01-01 00:00:00",
+      cost BIGINT SUM DEFAULT "0",
+      max_dwell_time INT MAX DEFAULT "0",
+      min_dwell_time INT MIN DEFAULT "99999",
+      hll_col HLL HLL_UNION NOT NULL,
+      bitmap_col Bitmap BITMAP_UNION NOT NULL
+    ) AGGREGATE KEY(user_id, date, city, age, sex)
+    DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");"""
+
+    sql """insert into compaction_tbl values(123,"1999-10-10",'aaa',123,123,"1970-01-01 00:00:00","1970-01-01 00:00:00","1970-01-01 00:00:00",123,123,123,hll_hash(""),bitmap_from_string(""));"""
+
     def baseall_count = sql "select count(*) from ${dbName}.baseall"
     assertEquals(16, baseall_count[0][0])
     def test_count = sql "select count(*) from ${dbName}.test"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org