You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/10/14 10:28:28 UTC

[GitHub] [doris] HappenLee commented on a diff in pull request #13314: [Improvement](like) Change `like` function to batch call

HappenLee commented on code in PR #13314:
URL: https://github.com/apache/doris/pull/13314#discussion_r995585935


##########
be/src/vec/functions/like.cpp:
##########
@@ -63,35 +63,171 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
+Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
                                                  const StringValue& pattern,
-                                                 unsigned char* result) {
-    *result = (val.len >= state->search_string_sv.len) &&
-              (state->search_string_sv == val.substring(0, state->search_string_sv.len));
+                                                 ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        const auto& str_ref = val.get_data_at(i);
+        result[i] = (str_ref.size >= state->search_string_sv.size) &&
+                    str_ref.start_with(state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
+                                               const StringValue& pattern,
+                                               ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        const auto& str_ref = val.get_data_at(i);
+        result[i] = (str_ref.size >= state->search_string_sv.size) &&
+                    str_ref.end_with(state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val,
+                                            const StringValue& pattern,
+                                            ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (val.get_data_at(i) == state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val,
+                                               const StringValue& pattern,
+                                               ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        if (state->search_string_sv.size == 0) {
+            result[i] = true;

Review Comment:
   why here only set one result element and return ? seems wrong?



##########
be/src/vec/functions/like.cpp:
##########
@@ -63,35 +63,171 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
+Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
                                                  const StringValue& pattern,
-                                                 unsigned char* result) {
-    *result = (val.len >= state->search_string_sv.len) &&
-              (state->search_string_sv == val.substring(0, state->search_string_sv.len));
+                                                 ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        const auto& str_ref = val.get_data_at(i);
+        result[i] = (str_ref.size >= state->search_string_sv.size) &&
+                    str_ref.start_with(state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
+                                               const StringValue& pattern,
+                                               ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        const auto& str_ref = val.get_data_at(i);
+        result[i] = (str_ref.size >= state->search_string_sv.size) &&
+                    str_ref.end_with(state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val,
+                                            const StringValue& pattern,
+                                            ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (val.get_data_at(i) == state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val,
+                                               const StringValue& pattern,
+                                               ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        if (state->search_string_sv.size == 0) {
+            result[i] = true;
+            return Status::OK();
+        }
+        result[i] = state->substring_pattern.search(val.get_data_at(i)) != -1;
+    }
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const StringValue& val,
-                                               const StringValue& pattern, unsigned char* result) {
-    *result = (val.len >= state->search_string_sv.len) &&
-              (state->search_string_sv ==
-               val.substring(val.len - state->search_string_sv.len, state->search_string_sv.len));
+Status FunctionLikeBase::constant_starts_with_fn_predicate(
+        LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (data_ptr[sel[i]].size >= state->search_string_sv.size) &&
+                    (state->search_string_sv ==
+                     data_ptr[sel[i]].substring(0, state->search_string_sv.size));
+    }
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const StringValue& val,
-                                            const StringValue& pattern, unsigned char* result) {
+Status FunctionLikeBase::constant_ends_with_fn_predicate(
+        LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        result[i] =
+                (data_ptr[sel[i]].size >= state->search_string_sv.size) &&
+                (state->search_string_sv ==
+                 data_ptr[sel[i]].substring(data_ptr[sel[i]].size - state->search_string_sv.size,
+                                            state->search_string_sv.size));
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn_predicate(LikeSearchState* state,
+                                                      const PredicateColumnType<TYPE_STRING>& val,
+                                                      const StringValue& pattern,
+                                                      ColumnUInt8::Container& result, uint16_t* sel,
+                                                      size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (data_ptr[sel[i]] == state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn_predicate(
+        LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        if (state->search_string_sv.size == 0) {
+            result[i] = true;

Review Comment:
   same as upper



##########
be/src/vec/functions/like.h:
##########
@@ -166,11 +169,64 @@ class FunctionLikeBase : public IFunction {
                                                  const StringValue* values, uint16_t size,
                                                  unsigned char* result);
 
-    static Status constant_regex_fn(LikeSearchState* state, const StringValue& val,
-                                    const StringValue& pattern, unsigned char* result);
+    static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val,
+                                    const StringValue& pattern, ColumnUInt8::Container& result);
+
+    static Status regexp_fn(LikeSearchState* state, const ColumnString& val,
+                            const StringValue& pattern, ColumnUInt8::Container& result);
+

Review Comment:
   Add a comment the code with name `fn_predicate` only execute in storage engine? or a better name ?
   



##########
be/src/vec/functions/like.cpp:
##########
@@ -63,35 +63,171 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
+Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val,
                                                  const StringValue& pattern,
-                                                 unsigned char* result) {
-    *result = (val.len >= state->search_string_sv.len) &&
-              (state->search_string_sv == val.substring(0, state->search_string_sv.len));
+                                                 ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        const auto& str_ref = val.get_data_at(i);
+        result[i] = (str_ref.size >= state->search_string_sv.size) &&
+                    str_ref.start_with(state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val,
+                                               const StringValue& pattern,
+                                               ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        const auto& str_ref = val.get_data_at(i);
+        result[i] = (str_ref.size >= state->search_string_sv.size) &&
+                    str_ref.end_with(state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val,
+                                            const StringValue& pattern,
+                                            ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (val.get_data_at(i) == state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val,
+                                               const StringValue& pattern,
+                                               ColumnUInt8::Container& result) {
+    auto sz = val.size();
+    for (size_t i = 0; i < sz; i++) {
+        if (state->search_string_sv.size == 0) {
+            result[i] = true;
+            return Status::OK();
+        }
+        result[i] = state->substring_pattern.search(val.get_data_at(i)) != -1;
+    }
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const StringValue& val,
-                                               const StringValue& pattern, unsigned char* result) {
-    *result = (val.len >= state->search_string_sv.len) &&
-              (state->search_string_sv ==
-               val.substring(val.len - state->search_string_sv.len, state->search_string_sv.len));
+Status FunctionLikeBase::constant_starts_with_fn_predicate(
+        LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (data_ptr[sel[i]].size >= state->search_string_sv.size) &&
+                    (state->search_string_sv ==
+                     data_ptr[sel[i]].substring(0, state->search_string_sv.size));
+    }
     return Status::OK();
 }
 
-Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const StringValue& val,
-                                            const StringValue& pattern, unsigned char* result) {
+Status FunctionLikeBase::constant_ends_with_fn_predicate(
+        LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        result[i] =
+                (data_ptr[sel[i]].size >= state->search_string_sv.size) &&
+                (state->search_string_sv ==
+                 data_ptr[sel[i]].substring(data_ptr[sel[i]].size - state->search_string_sv.size,
+                                            state->search_string_sv.size));
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn_predicate(LikeSearchState* state,
+                                                      const PredicateColumnType<TYPE_STRING>& val,
+                                                      const StringValue& pattern,
+                                                      ColumnUInt8::Container& result, uint16_t* sel,
+                                                      size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        result[i] = (data_ptr[sel[i]] == state->search_string_sv);
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn_predicate(
+        LikeSearchState* state, const PredicateColumnType<TYPE_STRING>& val,
+        const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) {
+    auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
+    for (size_t i = 0; i < sz; i++) {
+        if (state->search_string_sv.size == 0) {
+            result[i] = true;

Review Comment:
   same as upper



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org