You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/04/08 09:05:41 UTC

[doris] branch master updated: [optimize](string) optimize concat function by SIMD memcpy (#18458)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fb50626075 [optimize](string) optimize concat function by SIMD memcpy (#18458)
fb50626075 is described below

commit fb50626075227e4b8b8a6da373aa7c9b796bd618
Author: ZhangYu0123 <67...@users.noreply.github.com>
AuthorDate: Sat Apr 8 17:05:34 2023 +0800

    [optimize](string) optimize concat function by SIMD memcpy (#18458)
    
    Optimize concat function 29% up by memcpy_small_allow_read_write_overflow15.
    Optimize string functions list: concat, convert_to, mask, initcap, lower, upper.
    
    concat function has 29% up:
---
 be/src/vec/functions/function_string.cpp | 10 ++++++----
 be/src/vec/functions/function_string.h   | 21 +++++++++++++--------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 8952ff73da..34c65219f0 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -255,8 +255,9 @@ struct TransferImpl {
         }
 
         res_offsets.resize(offset_size);
-        memcpy(res_offsets.data(), offsets.data(),
-               offset_size * sizeof(ColumnString::Offsets::value_type));
+        memcpy_small_allow_read_write_overflow15(
+                res_offsets.data(), offsets.data(),
+                offset_size * sizeof(ColumnString::Offsets::value_type));
 
         size_t data_length = data.size();
         res_data.resize(data_length);
@@ -279,8 +280,9 @@ struct InitcapImpl {
                          ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
         size_t offset_size = offsets.size();
         res_offsets.resize(offsets.size());
-        memcpy(res_offsets.data(), offsets.data(),
-               offset_size * sizeof(ColumnString::Offsets::value_type));
+        memcpy_small_allow_read_write_overflow15(
+                res_offsets.data(), offsets.data(),
+                offset_size * sizeof(ColumnString::Offsets::value_type));
 
         size_t data_length = data.size();
         res_data.resize(data_length);
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index f257ad3537..83b6782192 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -368,8 +368,9 @@ private:
                             const char lower, const char number) {
         result.get_chars().resize(source.get_chars().size());
         result.get_offsets().resize(source.get_offsets().size());
-        memcpy(result.get_offsets().data(), source.get_offsets().data(),
-               source.get_offsets().size() * sizeof(ColumnString::Offset));
+        memcpy_small_allow_read_write_overflow15(
+                result.get_offsets().data(), source.get_offsets().data(),
+                source.get_offsets().size() * sizeof(ColumnString::Offset));
 
         const unsigned char* src = source.get_chars().data();
         const size_t size = source.get_chars().size();
@@ -452,8 +453,9 @@ private:
         auto* offsets = src.get_offsets().data();
         result.get_chars().resize(src.get_chars().size());
         result.get_offsets().resize(src.get_offsets().size());
-        memcpy(result.get_offsets().data(), src.get_offsets().data(),
-               src.get_offsets().size() * sizeof(ColumnString::Offset));
+        memcpy_small_allow_read_write_overflow15(
+                result.get_offsets().data(), src.get_offsets().data(),
+                src.get_offsets().size() * sizeof(ColumnString::Offset));
         auto* res = result.get_chars().data();
 
         for (ssize_t i = 0; i != num_rows; ++i) {
@@ -709,9 +711,12 @@ public:
                 auto& current_chars = *chars_list[j];
 
                 int size = current_offsets[i] - current_offsets[i - 1];
-                memcpy(&res_data[res_offset[i - 1]] + current_length,
-                       &current_chars[current_offsets[i - 1]], size);
-                current_length += size;
+                if (size > 0) {
+                    memcpy_small_allow_read_write_overflow15(
+                            &res_data[res_offset[i - 1]] + current_length,
+                            &current_chars[current_offsets[i - 1]], size);
+                    current_length += size;
+                }
             }
             res_offset[i] = res_offset[i - 1] + current_length;
         }
@@ -2530,7 +2535,7 @@ public:
 
     void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) {
         auto do_memcpy = [](char*& dest, const char*& from, size_t size) {
-            memcpy(dest, from, size);
+            memcpy_small_allow_read_write_overflow15(dest, from, size);
             dest += size;
             from += size;
         };


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org