You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2022/12/10 10:45:52 UTC

[doris] branch master updated: [fix](csv-reader) fix be crash when reading invalid value (#14951)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0b945e4ee3 [fix](csv-reader) fix be crash when reading invalid value (#14951)
0b945e4ee3 is described below

commit 0b945e4ee3181a22051864436c1f928469b997d8
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Sat Dec 10 18:45:47 2022 +0800

    [fix](csv-reader) fix be crash when reading invalid value (#14951)
---
 be/src/exec/text_converter.hpp | 10 ++++++++--
 be/src/vec/core/block.cpp      | 12 ++++++++++++
 be/src/vec/core/block.h        |  2 ++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp
index 45a7a4e570..55aee22d0a 100644
--- a/be/src/exec/text_converter.hpp
+++ b/be/src/exec/text_converter.hpp
@@ -205,6 +205,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
         }
     }
 
+    bool insert_after_parse_failure = true;
     StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
     // Parse the raw-text data. Translate the text string to internal format.
     switch (slot_desc->type().type) {
@@ -271,6 +272,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
         vectorized::VecDateTimeValue ts_slot;
         if (!ts_slot.from_date_str(data, len)) {
             parse_result = StringParser::PARSE_FAILURE;
+            insert_after_parse_failure = false;
             break;
         }
         ts_slot.cast_to_date();
@@ -283,6 +285,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
         vectorized::VecDateTimeValue ts_slot;
         if (!ts_slot.from_date_str(data, len)) {
             parse_result = StringParser::PARSE_FAILURE;
+            insert_after_parse_failure = false;
             break;
         }
         ts_slot.to_datetime();
@@ -295,6 +298,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
         DecimalV2Value decimal_slot;
         if (decimal_slot.parse_from_str(data, len)) {
             parse_result = StringParser::PARSE_FAILURE;
+            insert_after_parse_failure = false;
             break;
         }
         reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(
@@ -308,12 +312,14 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
     }
 
     if (UNLIKELY(parse_result == StringParser::PARSE_FAILURE)) {
-        if (true == slot_desc->is_nullable()) {
+        if (slot_desc->is_nullable()) {
             auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(nullable_col_ptr);
             size_t size = nullable_column->get_null_map_data().size();
             doris::vectorized::NullMap& null_map_data = nullable_column->get_null_map_data();
             null_map_data[size - 1] = 1;
-            nullable_column->get_nested_column().insert_default();
+            if (!insert_after_parse_failure) {
+                nullable_column->get_nested_column().insert_default();
+            }
         }
         return false;
     }
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 827eaab9ac..fe81a98968 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -320,6 +320,18 @@ size_t Block::rows() const {
     return 0;
 }
 
+std::string Block::each_col_size() {
+    std::stringstream ss;
+    for (const auto& elem : data) {
+        if (elem.column) {
+            ss << elem.column->size() << " | ";
+        } else {
+            ss << "-1 | ";
+        }
+    }
+    return ss.str();
+}
+
 void Block::set_num_rows(size_t length) {
     if (rows() > length) {
         for (auto& elem : data) {
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 93d9cabaab..6b7cc9d5a1 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -180,6 +180,8 @@ public:
     /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
     size_t rows() const;
 
+    std::string each_col_size();
+
     // Cut the rows in block, use in LIMIT operation
     void set_num_rows(size_t length);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org