You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by li...@apache.org on 2022/10/16 03:48:42 UTC
[doris] branch master updated: [Opt](fun) simd the substring function and use stack buf to speed up (#13338)
This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 144486e220 [Opt](fun) simd the substring function and use stack buf to speed up (#13338)
144486e220 is described below
commit 144486e220d85f587b505757c163fc5f12821c66
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sun Oct 16 11:48:34 2022 +0800
[Opt](fun) simd the substring function and use stack buf to speed up (#13338)
---
be/src/vec/functions/function_string.h | 41 ++++++++++++++++++----------------
1 file changed, 22 insertions(+), 19 deletions(-)
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index 09672f2018..b4ef258639 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -22,6 +22,7 @@
#include <fmt/ranges.h>
#include <cstdint>
+#include <memory_resource>
#include <string_view>
#include "exprs/math_functions.h"
@@ -50,18 +51,14 @@ namespace doris::vectorized {
inline size_t get_utf8_byte_length(unsigned char byte) {
size_t char_size = 0;
- if (byte >= 0xFC) {
- char_size = 6;
- } else if (byte >= 0xF8) {
- char_size = 5;
+ if (byte < 0xC0) {
+ char_size = 1;
} else if (byte >= 0xF0) {
char_size = 4;
} else if (byte >= 0xE0) {
char_size = 3;
- } else if (byte >= 0xC0) {
- char_size = 2;
} else {
- char_size = 1;
+ char_size = 2;
}
return char_size;
}
@@ -144,6 +141,7 @@ struct SubstringUtil {
assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get());
auto specific_len_column =
assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
+
vector(specific_str_column->get_chars(), specific_str_column->get_offsets(),
specific_start_column->get_data(), specific_len_column->get_data(),
null_map->get_data(), res->get_chars(), res->get_offsets());
@@ -160,18 +158,24 @@ private:
int size = offsets.size();
res_offsets.resize(size);
res_chars.reserve(chars.size());
- std::vector<size_t> index;
+ std::array<std::byte, 128 * 1024> buf;
+ std::pmr::monotonic_buffer_resource pool {buf.data(), buf.size()};
+ std::pmr::vector<size_t> index {&pool};
+
+ std::pmr::vector<std::pair<const unsigned char*, int>> strs(&pool);
+ strs.resize(size);
+ auto* __restrict data_ptr = chars.data();
+ auto* __restrict offset_ptr = offsets.data();
for (int i = 0; i < size; ++i) {
- auto* raw_str = reinterpret_cast<const unsigned char*>(&chars[offsets[i - 1]]);
- int str_size = offsets[i] - offsets[i - 1];
+ strs[i].first = data_ptr + offset_ptr[i - 1];
+ strs[i].second = offset_ptr[i] - offset_ptr[i - 1];
+ }
+
+ for (int i = 0; i < size; ++i) {
+ auto [raw_str, str_size] = strs[i];
// return empty string if start > src.length
- if (start[i] > str_size) {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- continue;
- }
- // return "" if len < 0 or str == 0 or start == 0
- if (len[i] <= 0 || str_size == 0 || start[i] == 0) {
+ if (start[i] > str_size || str_size == 0 || start[i] == 0 || len[i] <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
@@ -306,9 +310,8 @@ public:
size_t result, size_t input_rows_count) override {
auto int_type = std::make_shared<DataTypeInt32>();
size_t num_columns_without_result = block.columns();
- block.insert({int_type->create_column_const(input_rows_count, to_field(1))
- ->convert_to_full_column_if_const(),
- int_type, "const 1"});
+ block.insert({int_type->create_column_const(input_rows_count, to_field(1)), int_type,
+ "const 1"});
ColumnNumbers temp_arguments(3);
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org