You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/04/08 09:05:41 UTC
[doris] branch master updated: [optimize](string) optimize concat function by SIMD memcpy (#18458)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fb50626075 [optimize](string) optimize concat function by SIMD memcpy (#18458)
fb50626075 is described below
commit fb50626075227e4b8b8a6da373aa7c9b796bd618
Author: ZhangYu0123 <67...@users.noreply.github.com>
AuthorDate: Sat Apr 8 17:05:34 2023 +0800
[optimize](string) optimize concat function by SIMD memcpy (#18458)
Optimize concat function 29% up by memcpy_small_allow_read_write_overflow15.
Optimize string functions list: concat, convert_to, mask, initcap, lower, upper.
concat function has 29% up:
---
be/src/vec/functions/function_string.cpp | 10 ++++++----
be/src/vec/functions/function_string.h | 21 +++++++++++++--------
2 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
index 8952ff73da..34c65219f0 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -255,8 +255,9 @@ struct TransferImpl {
}
res_offsets.resize(offset_size);
- memcpy(res_offsets.data(), offsets.data(),
- offset_size * sizeof(ColumnString::Offsets::value_type));
+ memcpy_small_allow_read_write_overflow15(
+ res_offsets.data(), offsets.data(),
+ offset_size * sizeof(ColumnString::Offsets::value_type));
size_t data_length = data.size();
res_data.resize(data_length);
@@ -279,8 +280,9 @@ struct InitcapImpl {
ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
size_t offset_size = offsets.size();
res_offsets.resize(offsets.size());
- memcpy(res_offsets.data(), offsets.data(),
- offset_size * sizeof(ColumnString::Offsets::value_type));
+ memcpy_small_allow_read_write_overflow15(
+ res_offsets.data(), offsets.data(),
+ offset_size * sizeof(ColumnString::Offsets::value_type));
size_t data_length = data.size();
res_data.resize(data_length);
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index f257ad3537..83b6782192 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -368,8 +368,9 @@ private:
const char lower, const char number) {
result.get_chars().resize(source.get_chars().size());
result.get_offsets().resize(source.get_offsets().size());
- memcpy(result.get_offsets().data(), source.get_offsets().data(),
- source.get_offsets().size() * sizeof(ColumnString::Offset));
+ memcpy_small_allow_read_write_overflow15(
+ result.get_offsets().data(), source.get_offsets().data(),
+ source.get_offsets().size() * sizeof(ColumnString::Offset));
const unsigned char* src = source.get_chars().data();
const size_t size = source.get_chars().size();
@@ -452,8 +453,9 @@ private:
auto* offsets = src.get_offsets().data();
result.get_chars().resize(src.get_chars().size());
result.get_offsets().resize(src.get_offsets().size());
- memcpy(result.get_offsets().data(), src.get_offsets().data(),
- src.get_offsets().size() * sizeof(ColumnString::Offset));
+ memcpy_small_allow_read_write_overflow15(
+ result.get_offsets().data(), src.get_offsets().data(),
+ src.get_offsets().size() * sizeof(ColumnString::Offset));
auto* res = result.get_chars().data();
for (ssize_t i = 0; i != num_rows; ++i) {
@@ -709,9 +711,12 @@ public:
auto& current_chars = *chars_list[j];
int size = current_offsets[i] - current_offsets[i - 1];
- memcpy(&res_data[res_offset[i - 1]] + current_length,
- ¤t_chars[current_offsets[i - 1]], size);
- current_length += size;
+ if (size > 0) {
+ memcpy_small_allow_read_write_overflow15(
+ &res_data[res_offset[i - 1]] + current_length,
+ ¤t_chars[current_offsets[i - 1]], size);
+ current_length += size;
+ }
}
res_offset[i] = res_offset[i - 1] + current_length;
}
@@ -2530,7 +2535,7 @@ public:
void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) {
auto do_memcpy = [](char*& dest, const char*& from, size_t size) {
- memcpy(dest, from, size);
+ memcpy_small_allow_read_write_overflow15(dest, from, size);
dest += size;
from += size;
};
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org