You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/10/20 00:45:32 UTC

[doris] branch master updated: [fix](String) fix string type length set to -1 when load stirng data (#13475)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3a2d5db914 [fix](String) fix string type length set to -1 when load stirng data (#13475)
3a2d5db914 is described below

commit 3a2d5db914050d690f9a71c855f3e48dac3dd6a5
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Thu Oct 20 08:45:25 2022 +0800

    [fix](String) fix string type length set to -1 when load stirng data (#13475)
    
    string type length may set to -1 when create TypeDescriptor from thrift or protobuf, this will cause check limit overflow
---
 be/src/runtime/types.cpp         | 16 ++++++++++++++--
 be/src/vec/sink/vtablet_sink.cpp |  6 +++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index 3e80f5ec18..aa38f35542 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -43,6 +43,12 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx)
             DCHECK(scalar_type.__isset.scale);
             precision = scalar_type.precision;
             scale = scalar_type.scale;
+        } else if (type == TYPE_STRING) {
+            if (scalar_type.__isset.len) {
+                len = scalar_type.len;
+            } else {
+                len = OLAP_STRING_MAX_LENGTH;
+            }
         }
         break;
     }
@@ -111,7 +117,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const {
         node.__set_scalar_type(TScalarType());
         TScalarType& scalar_type = node.scalar_type;
         scalar_type.__set_type(doris::to_thrift(type));
-        if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL) {
+        if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_STRING) {
             // DCHECK_NE(len, -1);
             scalar_type.__set_len(len);
         } else if (type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
@@ -131,7 +137,7 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const {
     node->set_type(TTypeNodeType::SCALAR);
     auto scalar_type = node->mutable_scalar_type();
     scalar_type->set_type(doris::to_thrift(type));
-    if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL) {
+    if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_STRING) {
         scalar_type->set_len(len);
     } else if (type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
                type == TYPE_DECIMAL128 || type == TYPE_DATETIMEV2) {
@@ -167,6 +173,12 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod
             DCHECK(scalar_type.has_scale());
             precision = scalar_type.precision();
             scale = scalar_type.scale();
+        } else if (type == TYPE_STRING) {
+            if (scalar_type.has_len()) {
+                len = scalar_type.len();
+            } else {
+                len = OLAP_STRING_MAX_LENGTH;
+            }
         }
         break;
     }
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index bf4bae6dc1..ffeb6525ec 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -614,7 +614,11 @@ Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescripto
         const auto column_string =
                 assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
 
-        size_t limit = std::min(config::string_type_length_soft_limit_bytes, type.len);
+        size_t limit = config::string_type_length_soft_limit_bytes;
+        // when type.len is negative, std::min will return overflow value, so we need to check it
+        if (type.len > 0) {
+            limit = std::min(config::string_type_length_soft_limit_bytes, type.len);
+        }
         for (size_t j = 0; j < column->size(); ++j) {
             auto row = rows ? (*rows)[j] : j;
             if (row == last_invalid_row) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org