You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/10/27 01:07:05 UTC

[doris] branch branch-1.1-lts updated: [fix](String) fix string type length set to -1 when load string data (#13475) (#13696)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
     new 87ff01907c [fix](String) fix string type length set to -1 when load string data (#13475) (#13696)
87ff01907c is described below

commit 87ff01907c1f34a7c408115b8d19233d69b98f75
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Thu Oct 27 09:07:00 2022 +0800

    [fix](String) fix string type length set to -1 when load string data (#13475) (#13696)
    
    string type length may set to -1 when create TypeDescriptor from thrift or protobuf, this will cause check limit overflow
    cherry-pick #13475
---
 be/src/exprs/cast_functions.cpp  | 10 +++++-----
 be/src/runtime/types.cpp         | 16 ++++++++++++++--
 be/src/vec/sink/vtablet_sink.cpp |  7 +++++--
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/be/src/exprs/cast_functions.cpp b/be/src/exprs/cast_functions.cpp
index baadd09ce2..245d56d7c8 100644
--- a/be/src/exprs/cast_functions.cpp
+++ b/be/src/exprs/cast_functions.cpp
@@ -179,15 +179,15 @@ int float_to_string<double>(double value, char* buf) {
             return sv;                                                                         \
         }                                                                                      \
         const FunctionContext::TypeDesc& returnType = ctx->get_return_type();                  \
-        if (returnType.len > 0) {                                                              \
+        if (returnType.len == -1 || returnType.type == FunctionContext::TYPE_STRING) {         \
+            char buf[MAX_DOUBLE_STR_LENGTH + 2];                                               \
+            sv.len = float_to_string(val.val, buf);                                            \
+            memcpy(sv.ptr, buf, sv.len);                                                       \
+        } else if (returnType.len > 0) {                                                       \
             sv.len = snprintf(reinterpret_cast<char*>(sv.ptr), sv.len, format, val.val);       \
             DCHECK_GT(sv.len, 0);                                                              \
             DCHECK_LE(sv.len, MAX_FLOAT_CHARS);                                                \
             AnyValUtil::TruncateIfNecessary(returnType, &sv);                                  \
-        } else if (returnType.len == -1) {                                                     \
-            char buf[MAX_DOUBLE_STR_LENGTH + 2];                                               \
-            sv.len = float_to_string(val.val, buf);                                            \
-            memcpy(sv.ptr, buf, sv.len);                                                       \
         } else {                                                                               \
             DCHECK(false);                                                                     \
         }                                                                                      \
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index 63ff515121..6804715fee 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -40,6 +40,12 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx)
             DCHECK(scalar_type.__isset.scale);
             precision = scalar_type.precision;
             scale = scalar_type.scale;
+        } else if (type == TYPE_STRING) {
+            if (scalar_type.__isset.len) {
+                len = scalar_type.len;
+            } else {
+                len = OLAP_STRING_MAX_LENGTH;
+            }
         }
         break;
     }
@@ -105,7 +111,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const {
         node.__set_scalar_type(TScalarType());
         TScalarType& scalar_type = node.scalar_type;
         scalar_type.__set_type(doris::to_thrift(type));
-        if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL) {
+        if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_STRING) {
             // DCHECK_NE(len, -1);
             scalar_type.__set_len(len);
         } else if (type == TYPE_DECIMALV2) {
@@ -124,7 +130,7 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const {
     node->set_type(TTypeNodeType::SCALAR);
     auto scalar_type = node->mutable_scalar_type();
     scalar_type->set_type(doris::to_thrift(type));
-    if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL) {
+    if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_STRING) {
         scalar_type->set_len(len);
     } else if (type == TYPE_DECIMALV2) {
         DCHECK_NE(precision, -1);
@@ -158,6 +164,12 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod
             DCHECK(scalar_type.has_scale());
             precision = scalar_type.precision();
             scale = scalar_type.scale();
+        } else if (type == TYPE_STRING) {
+            if (scalar_type.has_len()) {
+                len = scalar_type.len();
+            } else {
+                len = OLAP_STRING_MAX_LENGTH;
+            }
         }
         break;
     }
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index 2288c2dc32..8989dd544e 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -204,8 +204,11 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl
         case TYPE_STRING: {
             const auto column_string =
                     assert_cast<const vectorized::ColumnString*>(real_column_ptr.get());
-
-            size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
+            size_t limit = config::string_type_length_soft_limit_bytes;
+            // when desc->type().len is negative, std::min will return overflow value, so we need to check it
+            if (desc->type().len > 0) {
+                limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len);
+            }
             for (int j = 0; j < num_rows; ++j) {
                 if (!filter_bitmap->Get(j)) {
                     auto str_val = column_string->get_data_at(j);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org