You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ga...@apache.org on 2022/10/16 08:18:30 UTC
[doris] branch master updated: [Improvement](like) Change `like` function to batch call (#13314)
This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1d5ba9cbcc [Improvement](like) Change `like` function to batch call (#13314)
1d5ba9cbcc is described below
commit 1d5ba9cbcce3d23a7a8f9b08127c2f32899afa80
Author: Gabriel <ga...@gmail.com>
AuthorDate: Sun Oct 16 16:18:22 2022 +0800
[Improvement](like) Change `like` function to batch call (#13314)
---
be/src/olap/like_column_predicate.cpp | 125 +++-------
be/src/olap/like_column_predicate.h | 72 ++----
be/src/runtime/string_search.hpp | 14 ++
be/src/vec/common/string_ref.h | 89 ++++---
be/src/vec/functions/like.cpp | 353 +++++++++++++++++++++-------
be/src/vec/functions/like.h | 132 +++++++----
be/test/vec/function/function_like_test.cpp | 16 --
7 files changed, 486 insertions(+), 315 deletions(-)
diff --git a/be/src/olap/like_column_predicate.cpp b/be/src/olap/like_column_predicate.cpp
index a3546d3546..1a50187cc9 100644
--- a/be/src/olap/like_column_predicate.cpp
+++ b/be/src/olap/like_column_predicate.cpp
@@ -86,8 +86,9 @@ uint16_t LikeColumnPredicate<is_vectorized>::evaluate(const vectorized::IColumn&
sel[new_size] = idx;
StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
- (_state->function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
+ (_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
new_size += _opposite ^ flag;
}
} else {
@@ -101,25 +102,24 @@ uint16_t LikeColumnPredicate<is_vectorized>::evaluate(const vectorized::IColumn&
StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
unsigned char flag = 0;
- (_state->function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
+ (_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
new_size += _opposite ^ flag;
}
}
} else {
- auto* data_array = vectorized::check_and_get_column<
- vectorized::PredicateColumnType<TYPE_STRING>>(nested_col)
- ->get_data()
- .data();
+ auto* str_col = vectorized::check_and_get_column<
+ vectorized::PredicateColumnType<TYPE_STRING>>(nested_col);
if (!nullable_col->has_null()) {
+ vectorized::ColumnUInt8::Container res(size, 0);
+ (_state->predicate_like_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state), *str_col,
+ pattern, res, sel, size);
for (uint16_t i = 0; i != size; i++) {
uint16_t idx = sel[i];
sel[new_size] = idx;
-
- unsigned char flag = 0;
- (_state->function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
- data_array[idx], pattern, &flag);
- new_size += _opposite ^ flag;
+ new_size += _opposite ^ res[i];
}
} else {
for (uint16_t i = 0; i != size; i++) {
@@ -130,88 +130,41 @@ uint16_t LikeColumnPredicate<is_vectorized>::evaluate(const vectorized::IColumn&
continue;
}
+ StringValue cell_value = str_col->get_data()[idx];
unsigned char flag = 0;
- (_state->function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
- data_array[idx], pattern, &flag);
+ (_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
new_size += _opposite ^ flag;
}
}
}
} else {
if (column.is_column_dictionary()) {
- if (_state->function_vec_dict) {
- if (LIKELY(_like_state.search_string_sv.len > 0)) {
- auto* nested_col_ptr = vectorized::check_and_get_column<
- vectorized::ColumnDictionary<vectorized::Int32>>(column);
- auto& data_array = nested_col_ptr->get_data();
- StringValue values[size];
- unsigned char flags[size];
- for (uint16_t i = 0; i != size; i++) {
- values[i] = nested_col_ptr->get_shrink_value(data_array[sel[i]]);
- }
- (_state->function_vec_dict)(
- const_cast<vectorized::LikeSearchState*>(&_like_state), pattern,
- values, size, flags);
-
- for (uint16_t i = 0; i != size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += _opposite ^ flags[i];
- }
- } else {
- for (uint16_t i = 0; i != size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += _opposite ^ true;
- }
- }
- } else {
- auto* nested_col_ptr = vectorized::check_and_get_column<
- vectorized::ColumnDictionary<vectorized::Int32>>(column);
- auto& data_array = nested_col_ptr->get_data();
- for (uint16_t i = 0; i != size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
- unsigned char flag = 0;
- (_state->function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
- new_size += _opposite ^ flag;
- }
+ auto* nested_col_ptr = vectorized::check_and_get_column<
+ vectorized::ColumnDictionary<vectorized::Int32>>(column);
+ auto& data_array = nested_col_ptr->get_data();
+ for (uint16_t i = 0; i != size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]);
+ unsigned char flag = 0;
+ (_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
+ new_size += _opposite ^ flag;
}
} else {
- if (_state->function_vec) {
- if (LIKELY(_like_state.search_string_sv.len > 0)) {
- auto* data_array =
- vectorized::check_and_get_column<
- vectorized::PredicateColumnType<TYPE_STRING>>(column)
- ->get_data()
- .data();
-
- (_state->function_vec)(
- const_cast<vectorized::LikeSearchState*>(&_like_state), pattern,
- data_array, sel, size, _opposite, &new_size);
- } else {
- for (uint16_t i = 0; i < size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += _opposite ^ true;
- }
- }
- } else {
- auto* data_array = vectorized::check_and_get_column<
- vectorized::PredicateColumnType<TYPE_STRING>>(column)
- ->get_data()
- .data();
-
- for (uint16_t i = 0; i != size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- unsigned char flag = 0;
- (_state->function)(const_cast<vectorized::LikeSearchState*>(&_like_state),
- data_array[idx], pattern, &flag);
- new_size += _opposite ^ flag;
- }
+ auto* str_col = vectorized::check_and_get_column<
+ vectorized::PredicateColumnType<TYPE_STRING>>(column);
+ vectorized::ColumnUInt8::Container res(size, 0);
+ (_state->predicate_like_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state), *str_col, pattern,
+ res, sel, size);
+ for (uint16_t i = 0; i != size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ new_size += _opposite ^ res[i];
}
}
}
diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h
index 2f01a4db97..0bb53c8119 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -101,15 +101,15 @@ private:
StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[i]);
if constexpr (is_and) {
unsigned char flag = 0;
- (_state->function)(
+ (_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
flags[i] &= _opposite ^ flag;
} else {
unsigned char flag = 0;
- (_state->function)(
+ (_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
flags[i] = _opposite ^ flag;
}
}
@@ -118,55 +118,23 @@ private:
}
} else {
if (column.is_column_dictionary()) {
- if (_state->function_vec_dict) {
- if (LIKELY(_like_state.search_string_sv.len > 0)) {
- auto* nested_col_ptr = vectorized::check_and_get_column<
- vectorized::ColumnDictionary<vectorized::Int32>>(column);
- auto& data_array = nested_col_ptr->get_data();
- StringValue values[size];
- unsigned char temp_flags[size];
- for (uint16_t i = 0; i != size; i++) {
- values[i] = nested_col_ptr->get_shrink_value(data_array[i]);
- }
- (_state->function_vec_dict)(
- const_cast<vectorized::LikeSearchState*>(&_like_state), pattern,
- values, size, temp_flags);
- for (uint16_t i = 0; i < size; i++) {
- if constexpr (is_and) {
- flags[i] &= _opposite ^ temp_flags[i];
- } else {
- flags[i] = _opposite ^ temp_flags[i];
- }
- }
+ auto* nested_col_ptr = vectorized::check_and_get_column<
+ vectorized::ColumnDictionary<vectorized::Int32>>(column);
+ auto& data_array = nested_col_ptr->get_data();
+ for (uint16_t i = 0; i < size; i++) {
+ StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[i]);
+ if constexpr (is_and) {
+ unsigned char flag = 0;
+ (_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
+ flags[i] &= _opposite ^ flag;
} else {
- for (uint16_t i = 0; i < size; i++) {
- if constexpr (is_and) {
- flags[i] &= _opposite ^ true;
- } else {
- flags[i] = _opposite ^ true;
- }
- }
- }
- } else {
- auto* nested_col_ptr = vectorized::check_and_get_column<
- vectorized::ColumnDictionary<vectorized::Int32>>(column);
- auto& data_array = nested_col_ptr->get_data();
- for (uint16_t i = 0; i < size; i++) {
- StringValue cell_value =
- nested_col_ptr->get_shrink_value(data_array[i]);
- if constexpr (is_and) {
- unsigned char flag = 0;
- (_state->function)(
- const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
- flags[i] &= _opposite ^ flag;
- } else {
- unsigned char flag = 0;
- (_state->function)(
- const_cast<vectorized::LikeSearchState*>(&_like_state),
- cell_value, pattern, &flag);
- flags[i] = _opposite ^ flag;
- }
+ unsigned char flag = 0;
+ (_state->scalar_function)(
+ const_cast<vectorized::LikeSearchState*>(&_like_state),
+ StringRef(cell_value.ptr, cell_value.len), pattern, &flag);
+ flags[i] = _opposite ^ flag;
}
}
} else {
diff --git a/be/src/runtime/string_search.hpp b/be/src/runtime/string_search.hpp
index 463719f279..6565f516b4 100644
--- a/be/src/runtime/string_search.hpp
+++ b/be/src/runtime/string_search.hpp
@@ -39,6 +39,11 @@ public:
_vol_searcher.reset(new Volnitsky(pattern->ptr, pattern->len));
}
+ void set_pattern(const StringRef* pattern) {
+ _pattern = reinterpret_cast<const StringValue*>(pattern);
+ _vol_searcher.reset(new Volnitsky(pattern->data, pattern->size));
+ }
+
// search for this pattern in str.
// Returns the offset into str if the pattern exists
// Returns -1 if the pattern is not found
@@ -51,6 +56,15 @@ public:
}
}
+ int search(const StringRef& str) const {
+ auto it = search(const_cast<char*>(str.data), str.size);
+ if (it == str.data + str.size) {
+ return -1;
+ } else {
+ return it - str.data;
+ }
+ }
+
// search for this pattern in str.
// Returns the offset into str if the pattern exists
// Returns str+len if the pattern is not found
diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h
index c339c3abea..006274bd7c 100644
--- a/be/src/vec/common/string_ref.h
+++ b/be/src/vec/common/string_ref.h
@@ -46,37 +46,6 @@
#include <sse2neon.h>
#endif
-/// The thing to avoid creating strings to find substrings in the hash table.
-struct StringRef {
- const char* data = nullptr;
- size_t size = 0;
-
- StringRef(const char* data_, size_t size_) : data(data_), size(size_) {}
- StringRef(const unsigned char* data_, size_t size_)
- : data(reinterpret_cast<const char*>(data_)), size(size_) {}
- StringRef(const std::string& s) : data(s.data()), size(s.size()) {}
- StringRef() = default;
-
- std::string to_string() const { return std::string(data, size); }
- std::string_view to_string_view() const { return std::string_view(data, size); }
- doris::Slice to_slice() const { return doris::Slice(data, size); }
-
- // this is just for show, eg. print data to error log, to avoid print large string.
- std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); }
-
- explicit operator std::string() const { return to_string(); }
-
- StringVal to_string_val() {
- return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(data)), size);
- }
-
- static StringRef from_string_val(StringVal sv) {
- return StringRef(reinterpret_cast<char*>(sv.ptr), sv.len);
- }
-};
-
-using StringRefs = std::vector<StringRef>;
-
#if defined(__SSE2__) || defined(__aarch64__)
/** Compare strings for equality.
@@ -163,6 +132,64 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
#endif
+/// The thing to avoid creating strings to find substrings in the hash table.
+struct StringRef {
+ const char* data = nullptr;
+ size_t size = 0;
+
+ StringRef(const char* data_, size_t size_) : data(data_), size(size_) {}
+ StringRef(const unsigned char* data_, size_t size_)
+ : data(reinterpret_cast<const char*>(data_)), size(size_) {}
+ StringRef(const std::string& s) : data(s.data()), size(s.size()) {}
+ StringRef() = default;
+
+ std::string to_string() const { return std::string(data, size); }
+ std::string_view to_string_view() const { return std::string_view(data, size); }
+ doris::Slice to_slice() const { return doris::Slice(data, size); }
+
+ // this is just for show, eg. print data to error log, to avoid print large string.
+ std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); }
+
+ explicit operator std::string() const { return to_string(); }
+
+ StringRef substring(int start_pos, int new_len) const {
+ return StringRef(data + start_pos, (new_len < 0) ? (size - start_pos) : new_len);
+ }
+
+ StringVal to_string_val() {
+ return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>(data)), size);
+ }
+
+ static StringRef from_string_val(StringVal sv) {
+ return StringRef(reinterpret_cast<char*>(sv.ptr), sv.len);
+ }
+
+ bool start_with(StringRef& search_string) const {
+ DCHECK(size >= search_string.size);
+ if (search_string.size == 0) return true;
+
+#if defined(__SSE2__) || defined(__aarch64__)
+ return memequalSSE2Wide(data, search_string.data, search_string.size);
+#else
+ return 0 == memcmp(data, search_string.data, search_string.size);
+#endif
+ }
+ bool end_with(StringRef& search_string) const {
+ DCHECK(size >= search_string.size);
+ if (search_string.size == 0) return true;
+
+#if defined(__SSE2__) || defined(__aarch64__)
+ return memequalSSE2Wide(data + size - search_string.size, search_string.data,
+ search_string.size);
+#else
+ return 0 ==
+ memcmp(data + size - search_string.size, search_string.data, search_string.size);
+#endif
+ }
+};
+
+using StringRefs = std::vector<StringRef>;
+
inline bool operator==(StringRef lhs, StringRef rhs) {
if (lhs.size != rhs.size) return false;
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index 1d156378e5..097352c360 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -63,66 +63,144 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
return Status::OK();
}
-Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
+Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
const StringValue& pattern,
- unsigned char* result) {
- *result = (val.len >= state->search_string_sv.len) &&
- (state->search_string_sv == val.substring(0, state->search_string_sv.len));
+ ColumnUInt8::Container& result) {
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ result[i] = (str_ref.size >= state->search_string_sv.size) &&
+ str_ref.start_with(state->search_string_sv);
+ }
return Status::OK();
}
-Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result) {
- *result = (val.len >= state->search_string_sv.len) &&
- (state->search_string_sv ==
- val.substring(val.len - state->search_string_sv.len, state->search_string_sv.len));
+Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result) {
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ result[i] = (str_ref.size >= state->search_string_sv.size) &&
+ str_ref.end_with(state->search_string_sv);
+ }
return Status::OK();
}
-Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result) {
- *result = (val == state->search_string_sv);
+Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result) {
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ result[i] = (val.get_data_at(i) == state->search_string_sv);
+ }
return Status::OK();
}
-Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result) {
- if (state->search_string_sv.len == 0) {
- *result = true;
- return Status::OK();
+Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result) {
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ if (state->search_string_sv.size == 0) {
+ result[i] = true;
+ }
+ result[i] = state->substring_pattern.search(val.get_data_at(i)) != -1;
}
- *result = state->substring_pattern.search(&val) != -1;
return Status::OK();
}
-Status FunctionLikeBase::constant_substring_fn_vec(LikeSearchState* state,
- const StringValue& pattern,
- const StringValue* values, uint16_t* sel,
- uint16_t size, bool opposite,
- uint16_t* new_size) {
- uint16_t count = 0;
- for (uint16_t i = 0; i < size; i++) {
- uint16_t idx = sel[i];
- sel[count] = idx;
- count += opposite ^ (state->substring_pattern.search(&values[idx]) != -1);
+Status FunctionLikeBase::constant_starts_with_fn_predicate(
+ LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+ auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ result[i] = (data_ptr[sel[i]].size >= state->search_string_sv.size) &&
+ (state->search_string_sv ==
+ data_ptr[sel[i]].substring(0, state->search_string_sv.size));
+ }
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_ends_with_fn_predicate(
+ LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+ auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ result[i] =
+ (data_ptr[sel[i]].size >= state->search_string_sv.size) &&
+ (state->search_string_sv ==
+ data_ptr[sel[i]].substring(data_ptr[sel[i]].size - state->search_string_sv.size,
+ state->search_string_sv.size));
+ }
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz) {
+ auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ result[i] = (data_ptr[sel[i]] == state->search_string_sv);
+ }
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn_predicate(
+ LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+ auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ if (state->search_string_sv.size == 0) {
+ result[i] = true;
+ }
+ result[i] = state->substring_pattern.search(data_ptr[sel[i]]) != -1;
}
- *new_size = count;
return Status::OK();
}
-Status FunctionLikeBase::constant_substring_fn_vec_dict(LikeSearchState* state,
+Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state,
+ const StringRef& val,
const StringValue& pattern,
- const StringValue* values, uint16_t size,
unsigned char* result) {
- for (uint16_t i = 0; i < size; i++) {
- result[i] = (state->substring_pattern.search(&values[i]) != -1);
+ *result = (val.size >= state->search_string_sv.size) &&
+ (state->search_string_sv == val.substring(0, state->search_string_sv.size));
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern,
+ unsigned char* result) {
+ *result = (val.size >= state->search_string_sv.size) &&
+ (state->search_string_sv == val.substring(val.size - state->search_string_sv.size,
+ state->search_string_sv.size));
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern,
+ unsigned char* result) {
+ *result = (val == state->search_string_sv);
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern,
+ unsigned char* result) {
+ if (state->search_string_sv.size == 0) {
+ *result = true;
+ return Status::OK();
}
+ *result = state->substring_pattern.search(val) != -1;
return Status::OK();
}
-Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result) {
- auto ret = hs_scan(state->hs_database.get(), val.ptr, val.len, 0, state->hs_scratch.get(),
+Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern,
+ unsigned char* result) {
+ auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(),
state->hs_match_handler, (void*)result);
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
@@ -131,16 +209,16 @@ Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const StringV
return Status::OK();
}
-Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result) {
- std::string re_pattern(pattern.ptr, pattern.len);
+Status FunctionLikeBase::regexp_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result) {
+ std::string_view re_pattern(pattern.ptr, pattern.len);
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
- RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch));
+ RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.data(), &database, &scratch));
- auto ret =
- hs_scan(database, val.ptr, val.len, 0, scratch, state->hs_match_handler, (void*)result);
+ auto ret = hs_scan(database, val.data, val.size, 0, scratch, state->hs_match_handler,
+ (void*)result);
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
}
@@ -151,6 +229,91 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const StringValue& va
return Status::OK();
}
+Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result) {
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ auto ret = hs_scan(state->hs_database.get(), str_ref.data, str_ref.size, 0,
+ state->hs_scratch.get(), state->hs_match_handler,
+ (void*)(result.data() + i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
+ }
+ }
+
+ return Status::OK();
+}
+
+Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result) {
+ std::string_view re_pattern(pattern.ptr, pattern.len);
+
+ hs_database_t* database = nullptr;
+ hs_scratch_t* scratch = nullptr;
+ RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.data(), &database, &scratch));
+
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
+ state->hs_match_handler, (void*)(result.data() + i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
+ }
+ }
+
+ hs_free_scratch(scratch);
+ hs_free_database(database);
+
+ return Status::OK();
+}
+
+Status FunctionLikeBase::constant_regex_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz) {
+ auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ auto ret = hs_scan(state->hs_database.get(), data_ptr[sel[i]].data, data_ptr[sel[i]].size,
+ 0, state->hs_scratch.get(), state->hs_match_handler,
+ (void*)(result.data() + i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
+ }
+ }
+
+ return Status::OK();
+}
+
+Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz) {
+ std::string_view re_pattern(pattern.ptr, pattern.len);
+
+ hs_database_t* database = nullptr;
+ hs_scratch_t* scratch = nullptr;
+ RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.data(), &database, &scratch));
+
+ auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ auto ret = hs_scan(database, data_ptr[sel[i]].data, data_ptr[sel[i]].size, 0, scratch,
+ state->hs_match_handler, (void*)(result.data() + i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
+ }
+ }
+
+ hs_free_scratch(scratch);
+ hs_free_database(database);
+
+ return Status::OK();
+}
+
// hyperscan compile expression to database and allocate scratch space
Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression,
hs_database_t** database, hs_scratch_t** scratch) {
@@ -196,22 +359,24 @@ Status FunctionLikeBase::execute_impl(FunctionContext* context, Block& block,
context->get_function_state(FunctionContext::THREAD_LOCAL));
// for constant_substring_fn, use long run length search for performance
if (constant_substring_fn ==
- *(state->function.target<doris::Status (*)(LikeSearchState * state, const StringValue&,
- const StringValue&, unsigned char*)>())) {
+ *(state->function
+ .target<doris::Status (*)(LikeSearchState * state, const ColumnString&,
+ const StringValue&, ColumnUInt8::Container&)>())) {
RETURN_IF_ERROR(execute_substring(values->get_chars(), values->get_offsets(), vec_res,
- state->function, &state->search_state));
+ &state->search_state));
} else {
const auto pattern_col = block.get_by_position(arguments[1]).column;
- if (const auto* patterns = check_and_get_column<ColumnString>(pattern_col.get())) {
- RETURN_IF_ERROR(vector_vector(values->get_chars(), values->get_offsets(),
- patterns->get_chars(), patterns->get_offsets(), vec_res,
- state->function, &state->search_state));
+ if (const auto* str_patterns = check_and_get_column<ColumnString>(pattern_col.get())) {
+ DCHECK_EQ(str_patterns->size(), 1);
+ const auto& pattern_val = str_patterns->get_data_at(0);
+ RETURN_IF_ERROR(vector_const(*values, &pattern_val, vec_res, state->function,
+ &state->search_state));
} else if (const auto* const_patterns =
check_and_get_column<ColumnConst>(pattern_col.get())) {
const auto& pattern_val = const_patterns->get_data_at(0);
- RETURN_IF_ERROR(vector_const(values->get_chars(), values->get_offsets(), &pattern_val,
- vec_res, state->function, &state->search_state));
+ RETURN_IF_ERROR(vector_const(*values, &pattern_val, vec_res, state->function,
+ &state->search_state));
} else {
return Status::InternalError("Not supported input arguments types");
}
@@ -232,7 +397,7 @@ Status FunctionLikeBase::close(FunctionContext* context,
Status FunctionLikeBase::execute_substring(const ColumnString::Chars& values,
const ColumnString::Offsets& value_offsets,
- ColumnUInt8::Container& result, const LikeFn& function,
+ ColumnUInt8::Container& result,
LikeSearchState* search_state) {
// treat continuous multi string data as a long string data
const UInt8* begin = values.data();
@@ -266,48 +431,40 @@ Status FunctionLikeBase::execute_substring(const ColumnString::Chars& values,
return Status::OK();
}
-Status FunctionLikeBase::vector_const(const ColumnString::Chars& values,
- const ColumnString::Offsets& value_offsets,
- const StringRef* pattern_val, ColumnUInt8::Container& result,
- const LikeFn& function, LikeSearchState* search_state) {
- const auto size = value_offsets.size();
-
- for (int i = 0; i < size; ++i) {
- char* val_raw_str = (char*)(&values[value_offsets[i - 1]]);
- UInt32 val_str_size = value_offsets[i] - value_offsets[i - 1];
-
- RETURN_IF_ERROR((function)(search_state, StringValue(val_raw_str, val_str_size),
- *reinterpret_cast<const StringValue*>(pattern_val), &result[i]));
- }
+Status FunctionLikeBase::vector_const(const ColumnString& values, const StringRef* pattern_val,
+ ColumnUInt8::Container& result, const LikeFn& function,
+ LikeSearchState* search_state) {
+ RETURN_IF_ERROR((function)(search_state, values,
+ *reinterpret_cast<const StringValue*>(pattern_val), result));
return Status::OK();
}
-Status FunctionLikeBase::vector_vector(const ColumnString::Chars& values,
- const ColumnString::Offsets& value_offsets,
- const ColumnString::Chars& patterns,
- const ColumnString::Offsets& pattern_offsets,
- ColumnUInt8::Container& result, const LikeFn& function,
- LikeSearchState* search_state) {
- const auto size = value_offsets.size();
+Status FunctionLike::like_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result) {
+ std::string re_pattern;
+ convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern);
- for (int i = 0; i < size; ++i) {
- char* val_raw_str = (char*)(&values[value_offsets[i - 1]]);
- UInt32 val_str_size = value_offsets[i] - value_offsets[i - 1];
+ return regexp_fn(state, val, {re_pattern.c_str(), (int)re_pattern.size()}, result);
+}
- char* pattern_raw_str = (char*)(&patterns[pattern_offsets[i - 1]]);
- UInt32 patter_str_size = pattern_offsets[i] - pattern_offsets[i - 1];
- RETURN_IF_ERROR((function)(search_state, StringValue(val_raw_str, val_str_size),
- StringValue(pattern_raw_str, patter_str_size), &result[i]));
- }
- return Status::OK();
+Status FunctionLike::like_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern, ColumnUInt8::Container& result,
+ uint16_t* sel, size_t sz) {
+ std::string re_pattern;
+ convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern);
+
+ return regexp_fn_predicate(state, val, {re_pattern.c_str(), (int)re_pattern.size()}, result,
+ sel, sz);
}
-Status FunctionLike::like_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result) {
+Status FunctionLike::like_fn_scalar(LikeSearchState* state, const StringValue& val,
+ const StringValue& pattern, unsigned char* result) {
std::string re_pattern;
convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern);
- return regexp_fn(state, val, {re_pattern.c_str(), (int)re_pattern.size()}, result);
+ return regexp_fn_scalar(state, StringRef(val.ptr, val.len),
+ {re_pattern.c_str(), (int)re_pattern.size()}, result);
}
void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::string& pattern,
@@ -373,6 +530,8 @@ Status FunctionLike::prepare(FunctionContext* context, FunctionContext::Function
auto* state = new LikeState();
context->set_function_state(scope, state);
state->function = like_fn;
+ state->predicate_like_function = like_fn_predicate;
+ state->scalar_function = like_fn_scalar;
if (context->is_col_constant(1)) {
const auto pattern_col = context->get_constant_col(1)->column_ptr;
const auto& pattern = pattern_col->get_data_at(0);
@@ -384,20 +543,26 @@ Status FunctionLike::prepare(FunctionContext* context, FunctionContext::Function
remove_escape_character(&search_string);
state->search_state.set_search_string(search_string);
state->function = constant_equals_fn;
+ state->predicate_like_function = constant_equals_fn_predicate;
+ state->scalar_function = constant_equals_fn_scalar;
} else if (RE2::FullMatch(pattern_str, LIKE_STARTS_WITH_RE, &search_string)) {
remove_escape_character(&search_string);
state->search_state.set_search_string(search_string);
state->function = constant_starts_with_fn;
+ state->predicate_like_function = constant_starts_with_fn_predicate;
+ state->scalar_function = constant_starts_with_fn_scalar;
} else if (RE2::FullMatch(pattern_str, LIKE_ENDS_WITH_RE, &search_string)) {
remove_escape_character(&search_string);
state->search_state.set_search_string(search_string);
state->function = constant_ends_with_fn;
+ state->predicate_like_function = constant_ends_with_fn_predicate;
+ state->scalar_function = constant_ends_with_fn_scalar;
} else if (RE2::FullMatch(pattern_str, LIKE_SUBSTRING_RE, &search_string)) {
remove_escape_character(&search_string);
state->search_state.set_search_string(search_string);
state->function = constant_substring_fn;
- state->function_vec = constant_substring_fn_vec;
- state->function_vec_dict = constant_substring_fn_vec_dict;
+ state->predicate_like_function = constant_substring_fn_predicate;
+ state->scalar_function = constant_substring_fn_scalar;
} else {
std::string re_pattern;
convert_like_pattern(&state->search_state, pattern_str, &re_pattern);
@@ -410,6 +575,8 @@ Status FunctionLike::prepare(FunctionContext* context, FunctionContext::Function
state->search_state.hs_scratch.reset(scratch);
state->function = constant_regex_fn;
+ state->predicate_like_function = constant_regex_fn_predicate;
+ state->scalar_function = constant_regex_fn_scalar;
}
}
return Status::OK();
@@ -423,6 +590,8 @@ Status FunctionRegexp::prepare(FunctionContext* context,
auto* state = new LikeState();
context->set_function_state(scope, state);
state->function = regexp_fn;
+ state->predicate_like_function = regexp_fn_predicate;
+ state->scalar_function = regexp_fn_scalar;
if (context->is_col_constant(1)) {
const auto pattern_col = context->get_constant_col(1)->column_ptr;
const auto& pattern = pattern_col->get_data_at(0);
@@ -432,17 +601,23 @@ Status FunctionRegexp::prepare(FunctionContext* context,
if (RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
state->search_state.set_search_string(search_string);
state->function = constant_equals_fn;
+ state->predicate_like_function = constant_equals_fn_predicate;
+ state->scalar_function = constant_equals_fn_scalar;
} else if (RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
state->search_state.set_search_string(search_string);
state->function = constant_starts_with_fn;
+ state->predicate_like_function = constant_starts_with_fn_predicate;
+ state->scalar_function = constant_starts_with_fn_scalar;
} else if (RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
state->search_state.set_search_string(search_string);
state->function = constant_ends_with_fn;
+ state->predicate_like_function = constant_ends_with_fn_predicate;
+ state->scalar_function = constant_ends_with_fn_scalar;
} else if (RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
state->search_state.set_search_string(search_string);
state->function = constant_substring_fn;
- state->function_vec = constant_substring_fn_vec;
- state->function_vec_dict = constant_substring_fn_vec_dict;
+ state->predicate_like_function = constant_substring_fn_predicate;
+ state->scalar_function = constant_substring_fn_scalar;
} else {
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
@@ -452,6 +627,8 @@ Status FunctionRegexp::prepare(FunctionContext* context,
state->search_state.hs_scratch.reset(scratch);
state->function = constant_regex_fn;
+ state->predicate_like_function = constant_regex_fn_predicate;
+ state->scalar_function = constant_regex_fn_scalar;
}
}
return Status::OK();
diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h
index 98458dbd5b..b44831a93d 100644
--- a/be/src/vec/functions/like.h
+++ b/be/src/vec/functions/like.h
@@ -27,6 +27,7 @@
#include "vec/columns/column_const.h"
#include "vec/columns/column_set.h"
#include "vec/columns/columns_number.h"
+#include "vec/columns/predicate_column.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/exprs/vexpr.h"
@@ -50,7 +51,7 @@ struct LikeSearchState {
/// constant string or has a constant string at the beginning or end of the pattern.
/// This will be set in order to check for that pattern in the corresponding part of
/// the string.
- doris::StringValue search_string_sv;
+ StringRef search_string_sv;
/// Used for LIKE predicates if the pattern is a constant argument and has a constant
/// string in the middle of it. This will be use in order to check for the substring
@@ -91,26 +92,27 @@ struct LikeSearchState {
void set_search_string(const std::string& search_string_arg) {
search_string = search_string_arg;
- search_string_sv = StringValue(search_string);
+ search_string_sv = StringRef(search_string);
substring_pattern.set_pattern(&search_string_sv);
}
};
-using LikeFn = std::function<doris::Status(LikeSearchState*, const StringValue&, const StringValue&,
- unsigned char*)>;
+using LikeFn = std::function<doris::Status(LikeSearchState*, const ColumnString&,
+ const StringValue&, ColumnUInt8::Container&)>;
-using LikeFnVec =
- std::function<doris::Status(LikeSearchState*, const StringValue&, const StringValue*,
- uint16_t*, uint16_t, bool, uint16_t*)>;
+using LikePredicateFn = std::function<doris::Status(
+ LikeSearchState*, const PredicateColumnType<TYPE_STRING>&, const StringValue&,
+ ColumnUInt8::Container&, uint16_t* sel, size_t sz)>;
-using LikeFnVecDict = std::function<doris::Status(LikeSearchState*, const StringValue&,
- const StringValue*, uint16_t, unsigned char*)>;
+using ScalarLikeFn = std::function<doris::Status(LikeSearchState*, const StringRef&,
+ const StringValue&, unsigned char*)>;
struct LikeState {
LikeSearchState search_state;
LikeFn function;
- LikeFnVec function_vec;
- LikeFnVecDict function_vec_dict;
+ // Two functions below are used only for predicate.
+ LikePredicateFn predicate_like_function;
+ ScalarLikeFn scalar_function;
};
class FunctionLikeBase : public IFunction {
@@ -129,48 +131,86 @@ public:
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
protected:
- Status vector_vector(const ColumnString::Chars& values,
- const ColumnString::Offsets& value_offsets,
- const ColumnString::Chars& patterns,
- const ColumnString::Offsets& pattern_offsets,
- ColumnUInt8::Container& result, const LikeFn& function,
- LikeSearchState* search_state);
-
- Status vector_const(const ColumnString::Chars& values,
- const ColumnString::Offsets& value_offsets, const StringRef* pattern_val,
+ Status vector_const(const ColumnString& values, const StringRef* pattern_val,
ColumnUInt8::Container& result, const LikeFn& function,
LikeSearchState* search_state);
Status execute_substring(const ColumnString::Chars& values,
const ColumnString::Offsets& value_offsets,
- ColumnUInt8::Container& result, const LikeFn& function,
- LikeSearchState* search_state);
+ ColumnUInt8::Container& result, LikeSearchState* search_state);
- static Status constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ static Status constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result);
- static Status constant_ends_with_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ static Status constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result);
- static Status constant_equals_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ static Status constant_equals_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result);
- static Status constant_substring_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ static Status constant_substring_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result);
- static Status constant_substring_fn_vec(LikeSearchState* state, const StringValue& pattern,
- const StringValue* values, uint16_t* sel, uint16_t size,
- bool opposite, uint16_t* new_size);
+ static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result);
- static Status constant_substring_fn_vec_dict(LikeSearchState* state, const StringValue& pattern,
- const StringValue* values, uint16_t size,
- unsigned char* result);
+ static Status regexp_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result);
- static Status constant_regex_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ // These functions below are used only for predicate.
+ static Status constant_regex_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz);
- static Status regexp_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ static Status regexp_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern, ColumnUInt8::Container& result,
+ uint16_t* sel, size_t sz);
+
+ static Status constant_starts_with_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz);
+
+ static Status constant_ends_with_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz);
+
+ static Status constant_equals_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz);
+
+ static Status constant_substring_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern,
+ ColumnUInt8::Container& result, uint16_t* sel,
+ size_t sz);
+
+ static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result);
+
+ static Status constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result);
+
+ static Status constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result);
+
+ static Status constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result);
+
+ static Status constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result);
+
+ static Status regexp_fn_scalar(LikeSearchState* state, const StringRef& val,
+ const StringValue& pattern, unsigned char* result);
// hyperscan compile expression to database and allocate scratch space
static Status hs_prepare(FunctionContext* context, const char* expression,
@@ -190,8 +230,16 @@ public:
friend struct LikeSearchState;
private:
- static Status like_fn(LikeSearchState* state, const StringValue& val,
- const StringValue& pattern, unsigned char* result);
+ static Status like_fn(LikeSearchState* state, const ColumnString& val,
+ const StringValue& pattern, ColumnUInt8::Container& result);
+
+ static Status like_fn_predicate(LikeSearchState* state,
+ const PredicateColumnType<TYPE_STRING>& val,
+ const StringValue& pattern, ColumnUInt8::Container& result,
+ uint16_t* sel, size_t sz);
+
+ static Status like_fn_scalar(LikeSearchState* state, const StringValue& val,
+ const StringValue& pattern, unsigned char* result);
static void convert_like_pattern(LikeSearchState* state, const std::string& pattern,
std::string* re_pattern);
diff --git a/be/test/vec/function/function_like_test.cpp b/be/test/vec/function/function_like_test.cpp
index e3c63fd294..2c8299e274 100644
--- a/be/test/vec/function/function_like_test.cpp
+++ b/be/test/vec/function/function_like_test.cpp
@@ -60,10 +60,6 @@ TEST(FunctionLikeTest, like) {
check_function<DataTypeUInt8, true>(func_name, const_pattern_input_types,
const_pattern_dataset);
}
-
- // pattern is not constant value
- InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
- check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
}
TEST(FunctionLikeTest, regexp) {
@@ -100,10 +96,6 @@ TEST(FunctionLikeTest, regexp) {
check_function<DataTypeUInt8, true>(func_name, const_pattern_input_types,
const_pattern_dataset);
}
-
- // pattern is not constant value
- InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
- check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
}
TEST(FunctionLikeTest, regexp_extract) {
@@ -143,10 +135,6 @@ TEST(FunctionLikeTest, regexp_extract) {
check_function<DataTypeString, true>(func_name, const_pattern_input_types,
const_pattern_dataset);
}
-
- // pattern is not constant value
- InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int64};
- check_function<DataTypeString, true>(func_name, input_types, data_set);
}
TEST(FunctionLikeTest, regexp_replace) {
@@ -177,10 +165,6 @@ TEST(FunctionLikeTest, regexp_replace) {
check_function<DataTypeString, true>(func_name, const_pattern_input_types,
const_pattern_dataset);
}
-
- // pattern is not constant value
- InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
- check_function<DataTypeString, true>(func_name, input_types, data_set);
}
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org