You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/19 13:34:35 UTC

[doris] 09/36: [BugFix](functions) fix multi_search_all_positions #18682

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0-alpha
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b883b2019a4c8dda2f58149a991bea66522bd19e
Author: zclllyybb <zh...@selectdb.com>
AuthorDate: Mon Apr 17 08:32:57 2023 +0800

    [BugFix](functions) fix multi_search_all_positions #18682
---
 be/src/vec/common/string_searcher.h                |  66 ----------
 .../functions/functions_multi_string_position.cpp  | 140 +++++++++++----------
 .../search/multi_search_all_positions.md           |  16 +--
 .../search/multi_search_all_positions.md           |  16 +--
 .../test_multi_string_position.out                 |  43 ++++---
 .../test_multi_string_position.groovy              |  47 +++++--
 6 files changed, 149 insertions(+), 179 deletions(-)

diff --git a/be/src/vec/common/string_searcher.h b/be/src/vec/common/string_searcher.h
index 97c5570a81..af76f2100d 100644
--- a/be/src/vec/common/string_searcher.h
+++ b/be/src/vec/common/string_searcher.h
@@ -416,70 +416,4 @@ struct LibCASCIICaseInsensitiveStringSearcher : public StringSearcherBase {
         return search(haystack, haystack + haystack_size);
     }
 };
-
-template <typename StringSearcher>
-class MultiStringSearcherBase {
-private:
-    /// needles
-    const std::vector<StringRef>& needles;
-    /// searchers
-    std::vector<StringSearcher> searchers;
-    /// last index of needles that was not processed
-    size_t last;
-
-public:
-    explicit MultiStringSearcherBase(const std::vector<StringRef>& needles_)
-            : needles {needles_}, last {0} {
-        searchers.reserve(needles.size());
-
-        size_t size = needles.size();
-        for (int i = 0; i < size; ++i) {
-            const char* cur_needle_data = needles[i].data;
-            const size_t cur_needle_size = needles[i].size;
-
-            searchers.emplace_back(cur_needle_data, cur_needle_size);
-        }
-    }
-
-    /**
-     * while (hasMoreToSearch())
-     * {
-     *     search inside the haystack with the known needles
-     * }
-     */
-    bool hasMoreToSearch() {
-        if (last >= needles.size()) {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool searchOne(const uint8_t* haystack, const uint8_t* haystack_end) {
-        const size_t size = needles.size();
-        if (last >= size) {
-            return false;
-        }
-
-        if (searchers[++last].search(haystack, haystack_end) != haystack_end) {
-            return true;
-        }
-        return false;
-    }
-
-    template <typename CountCharsCallback, typename AnsType>
-    void searchOneAll(const uint8_t* haystack, const uint8_t* haystack_end, AnsType* answer,
-                      const CountCharsCallback& count_chars) {
-        const size_t size = needles.size();
-        for (; last < size; ++last) {
-            const uint8_t* ptr = searchers[last].search(haystack, haystack_end);
-            if (ptr != haystack_end) {
-                answer[last] = count_chars(haystack, ptr);
-            }
-        }
-    }
-};
-
-using MultiStringSearcher = MultiStringSearcherBase<ASCIICaseSensitiveStringSearcher>;
-
 } // namespace doris
diff --git a/be/src/vec/functions/functions_multi_string_position.cpp b/be/src/vec/functions/functions_multi_string_position.cpp
index a3c3420acb..5b8dd13b9b 100644
--- a/be/src/vec/functions/functions_multi_string_position.cpp
+++ b/be/src/vec/functions/functions_multi_string_position.cpp
@@ -18,6 +18,9 @@
 // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsMultiStringPosition.h
 // and modified by Doris
 
+#include <cstdint>
+#include <iterator>
+
 #include "function.h"
 #include "function_helpers.h"
 #include "vec/columns/column_array.h"
@@ -103,48 +106,53 @@ public:
     }
 };
 
-template <typename Impl>
 struct FunctionMultiSearchAllPositionsImpl {
+public:
     using ResultType = Int32;
-
+    using SingleSearcher = ASCIICaseSensitiveStringSearcher;
     static constexpr auto name = "multi_search_all_positions";
 
     static Status vector_constant(const ColumnString::Chars& haystack_data,
                                   const ColumnString::Offsets& haystack_offsets,
                                   const Array& needles_arr, PaddedPODArray<Int32>& vec_res,
                                   PaddedPODArray<UInt64>& offsets_res) {
-        if (needles_arr.size() > std::numeric_limits<UInt8>::max())
+        if (needles_arr.size() > std::numeric_limits<UInt8>::max()) {
             return Status::InvalidArgument(
                     "number of arguments for function {} doesn't match: "
                     "passed {}, should be at most 255",
                     name, needles_arr.size());
+        }
 
-        std::vector<StringRef> needles;
-        needles.reserve(needles_arr.size());
-        for (const auto& needle : needles_arr) needles.emplace_back(needle.get<StringRef>());
-
-        auto res_callback = [](const UInt8* start, const UInt8* end) -> Int32 {
-            return 1 + Impl::count_chars(reinterpret_cast<const char*>(start),
-                                         reinterpret_cast<const char*>(end));
-        };
-
-        auto searcher = Impl::create_multi_searcher(needles);
+        const size_t needles_size = needles_arr.size();
+        std::vector<SingleSearcher> searchers;
+        searchers.reserve(needles_size);
+        for (const auto& needle : needles_arr) {
+            searchers.emplace_back(needle.get<StringRef>().data, needle.get<StringRef>().size);
+        }
 
         const size_t haystack_size = haystack_offsets.size();
-        const size_t needles_size = needles.size();
-
-        vec_res.resize(haystack_size * needles.size());
+        vec_res.resize(haystack_size * needles_size);
         offsets_res.resize(haystack_size);
 
         std::fill(vec_res.begin(), vec_res.end(), 0);
 
-        while (searcher.hasMoreToSearch()) {
+        // we traverse to generator answer by Vector's slot of ColumnVector, not by Vector.
+        // TODO: check if the order of loop is best. The large data may make us writing across the line which size out of L2 cache.
+        for (size_t ans_slot_in_row = 0; ans_slot_in_row < searchers.size(); ans_slot_in_row++) {
+            //  is i.e. answer slot index in one Vector(row) of answer
+            auto& searcher = searchers[ans_slot_in_row];
             size_t prev_haystack_offset = 0;
-            for (size_t j = 0, from = 0; j < haystack_size; ++j, from += needles_size) {
+
+            for (size_t haystack_index = 0, res_index = ans_slot_in_row;
+                 haystack_index < haystack_size; ++haystack_index, res_index += needles_size) {
                 const auto* haystack = &haystack_data[prev_haystack_offset];
-                const auto* haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset;
-                searcher.searchOneAll(haystack, haystack_end, &vec_res[from], res_callback);
-                prev_haystack_offset = haystack_offsets[j];
+                const auto* haystack_end =
+                        haystack - prev_haystack_offset + haystack_offsets[haystack_index];
+
+                auto ans_now = searcher.search(haystack, haystack_end);
+                vec_res[res_index] =
+                        ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1;
+                prev_haystack_offset = haystack_offsets[haystack_index];
             }
         }
 
@@ -166,72 +174,72 @@ struct FunctionMultiSearchAllPositionsImpl {
         size_t prev_haystack_offset = 0;
         size_t prev_needles_offset = 0;
 
-        auto res_callback = [](const UInt8* start, const UInt8* end) -> Int32 {
-            return 1 + Impl::count_chars(reinterpret_cast<const char*>(start),
-                                         reinterpret_cast<const char*>(end));
-        };
-
-        offsets_res.reserve(haystack_offsets.size());
+        offsets_res.reserve(haystack_data.size());
+        uint64_t offset_now = 0;
 
         auto& nested_column =
                 vectorized::check_and_get_column<vectorized::ColumnNullable>(needles_data)
                         ->get_nested_column();
         const ColumnString* needles_data_string = check_and_get_column<ColumnString>(nested_column);
 
-        std::vector<StringRef> needles;
-        for (size_t i = 0; i < haystack_offsets.size(); ++i) {
-            needles.reserve(needles_offsets[i] - prev_needles_offset);
-
-            for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) {
-                needles.emplace_back(needles_data_string->get_data_at(j));
+        std::vector<StringRef> needles_for_row;
+        // haystack first, row by row.
+        for (size_t haystack_index = 0; haystack_index < haystack_offsets.size();
+             ++haystack_index) {
+            // get haystack for this row.
+            const auto* haystack = &haystack_data[prev_haystack_offset];
+            const auto* haystack_end =
+                    haystack - prev_haystack_offset + haystack_offsets[haystack_index];
+
+            // build needles for this row.
+            needles_for_row.reserve(needles_offsets[haystack_index] - prev_needles_offset);
+            for (size_t j = prev_needles_offset; j < needles_offsets[haystack_index]; ++j) {
+                needles_for_row.emplace_back(needles_data_string->get_data_at(j));
             }
-
-            const size_t needles_size = needles.size();
-            if (needles_size > std::numeric_limits<UInt8>::max())
+            const size_t needles_row_size = needles_for_row.size();
+            if (needles_row_size > std::numeric_limits<UInt8>::max()) {
                 return Status::InvalidArgument(
                         "number of arguments for function {} doesn't match: "
                         "passed {}, should be at most 255",
-                        name, needles_size);
-
-            vec_res.resize(vec_res.size() + needles_size);
-
-            auto searcher = Impl::create_multi_searcher(needles);
-
-            std::fill(vec_res.begin() + vec_res.size() - needles_size, vec_res.end(), 0);
+                        name, needles_row_size);
+            }
 
-            while (searcher.hasMoreToSearch()) {
-                const auto* haystack = &haystack_data[prev_haystack_offset];
-                const auto* haystack_end = haystack + haystack_offsets[i] - prev_haystack_offset;
-                searcher.searchOneAll(haystack, haystack_end,
-                                      &vec_res[vec_res.size() - needles_size], res_callback);
+            // each searcher search for one needle.
+            std::vector<SingleSearcher> searchers;
+            searchers.clear();
+            searchers.reserve(needles_row_size);
+            for (auto needle : needles_for_row) {
+                searchers.emplace_back(needle.data, needle.size);
             }
 
-            if (offsets_res.empty())
-                offsets_res.push_back(needles_size);
-            else
-                offsets_res.push_back(offsets_res.back() + needles_size);
+            // search for first so that the ans's size is constant for each row.
+            auto ans_row_begin = vec_res.size();
+            vec_res.resize(vec_res.size() + needles_row_size);
+            offset_now += searchers.size();
+            offsets_res.emplace_back(offset_now);
+
+            //for now haystack, apply needle to search, generator answer by order.
+            for (size_t ans_slot_in_row = 0; ans_slot_in_row < searchers.size();
+                 ans_slot_in_row++) {
+                //  is i.e. answer slot index in one Vector(row) of answer
+                auto& searcher = searchers[ans_slot_in_row];
+
+                auto ans_now = searcher.search(haystack, haystack_end);
+                vec_res[ans_row_begin + ans_slot_in_row] =
+                        ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1;
+            }
 
-            prev_haystack_offset = haystack_offsets[i];
-            prev_needles_offset = needles_offsets[i];
-            needles.clear();
+            prev_haystack_offset = haystack_offsets[haystack_index];
+            prev_needles_offset = needles_offsets[haystack_index];
+            needles_for_row.clear();
         }
 
         return Status::OK();
     }
 };
 
-struct MultiSearcherImpl {
-    using MultiSearcher = MultiStringSearcher;
-
-    static MultiSearcher create_multi_searcher(const std::vector<StringRef>& needles) {
-        return MultiSearcher(needles);
-    }
-
-    static size_t count_chars(const char* begin, const char* end) { return end - begin; }
-};
-
 using FunctionMultiSearchAllPositions =
-        FunctionMultiStringPosition<FunctionMultiSearchAllPositionsImpl<MultiSearcherImpl>>;
+        FunctionMultiStringPosition<FunctionMultiSearchAllPositionsImpl>;
 
 void register_function_multi_string_position(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionMultiSearchAllPositions>();
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md b/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
index 9f5c07fdcc..db52923b6a 100644
--- a/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
@@ -30,8 +30,7 @@ under the License.
 
 `ARRAY<INT> multi_search_all_positions(VARCHAR haystack, ARRAY<VARCHAR> needles)`
 
-
-Searches for the substrings `needles` in the string `haystack`, and returns array of positions of the found corresponding substrings in the string. Positions are indexed starting from 1.
+Returns an `ARRAY` where the `i`-th element is the position of the `i`-th element in `needles`(i.e. `needle`)'s **first** occurrence in the string `haystack`. Positions are counted from 1, with 0 meaning the element was not found. **Case-sensitive**.
 
 ### example
 
@@ -43,12 +42,13 @@ mysql> select multi_search_all_positions('Hello, World!', ['hello', '!', 'world'
 | [0,13,0]                                                             |
 +----------------------------------------------------------------------+
 
-mysql> select multi_search_all_positions('abc', ['a', 'bc', 'd']);
-+-----------------------------------------------------+
-| multi_search_all_positions('abc', ['a', 'bc', 'd']) |
-+-----------------------------------------------------+
-| [1,2,0]                                             |
-+-----------------------------------------------------+
+select multi_search_all_positions("Hello, World!", ['hello', '!', 'world', 'Hello', 'World']);
++---------------------------------------------------------------------------------------------+
+| multi_search_all_positions('Hello, World!', ARRAY('hello', '!', 'world', 'Hello', 'World')) |
++---------------------------------------------------------------------------------------------+
+| [0, 13, 0, 1, 8]                                                                            |
++---------------------------------------------------------------------------------------------+
 ```
+
 ### keywords
     MULTI_SEARCH,SEARCH,POSITIONS
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
index f93466587d..27f60f1b59 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
@@ -30,8 +30,7 @@ under the License.
 
 `ARRAY<INT> multi_search_all_positions(VARCHAR haystack, ARRAY<VARCHAR> needles)`
 
-
-搜索字符串 `haystack` 中的子字符串 `needles`,并返回在 `haystack` 中找到的相应 `needles` 的位置数组。位置的下标从 1 开始。
+返回一个 `ARRAY`,其中第 `i` 个元素为 `needles` 中第 `i` 个元素 `needle`,在字符串 `haystack` 中**首次**出现的位置。位置从1开始计数,0代表未找到该元素。**大小写敏感**。
 
 ### example
 
@@ -43,12 +42,13 @@ mysql> select multi_search_all_positions('Hello, World!', ['hello', '!', 'world'
 | [0,13,0]                                                             |
 +----------------------------------------------------------------------+
 
-mysql> select multi_search_all_positions('abc', ['a', 'bc', 'd']);
-+-----------------------------------------------------+
-| multi_search_all_positions('abc', ['a', 'bc', 'd']) |
-+-----------------------------------------------------+
-| [1,2,0]                                             |
-+-----------------------------------------------------+
+select multi_search_all_positions("Hello, World!", ['hello', '!', 'world', 'Hello', 'World']);
++---------------------------------------------------------------------------------------------+
+| multi_search_all_positions('Hello, World!', ARRAY('hello', '!', 'world', 'Hello', 'World')) |
++---------------------------------------------------------------------------------------------+
+| [0, 13, 0, 1, 8]                                                                            |
++---------------------------------------------------------------------------------------------+
 ```
+
 ### keywords
     MULTI_SEARCH,SEARCH,POSITIONS
diff --git a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
index 165a97dea6..017fa620c4 100644
--- a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
+++ b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
@@ -1,25 +1,30 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
--- !select --
+-- !table_select1 --
+[0, 13, 0, 1, 8]
+[0, 13, 0, 1, 8]
+[1, 13, 8, 0, 0]
+[1, 13, 8, 0, 0]
+[0, 6, 0, 0, 0]
+
+-- !table_select2 --
+[0, 0]
+[0, 0, 1, 13]
+[0, 8]
+[1, 8, 0, 13]
+[1, 1, 4, 0]
+
+-- !select1 --
 [4, 1, 1, 2, 6, 1, 1, 0, 4, 1, 14, 0, 10, 0, 16, 6]
--- !select --
+
+-- !select2 --
 [0, 0, 0, 2, 3, 0, 1, 0, 5, 0, 0, 0, 11, 10, 6, 7]
--- !select --
+
+-- !select3 --
 [1, 1, 0, 0, 0, 1, 1, 1, 4, 0, 6, 6, 0, 10, 1, 5]
--- !select --
+
+-- !select4 --
 [1, 0, 0, 8, 6, 0, 7, 1, 3, 0, 0, 0, 0, 12]
--- !select --
+
+-- !select5 --
 [7, 0, 0, 8, 0, 2, 0, 0, 6, 0, 2, 0, 3, 1]
--- !select --
-[0, 0, 0, 19, 14, 22, 10, 0, 0, 13, 0, 8]
--- !select --
-[6, 1, 1, 0, 0, 5, 1, 0, 8, 0, 5, 0, 2, 12, 0, 15, 0, 0]
--- !select --
-[0, 0, 5, 1, 1, 0, 15, 1, 5, 10, 4, 0, 1, 0, 3, 0, 0, 0]
--- !select --
-[0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1]
--- !select --
-[0, 0, 0, 3, 0, 15, 0, 0, 12, 7, 0, 0, 0, 0, 5, 0]
--- !select --
-[0, 0, 12, 4, 4, 0, 13, 23, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0]
--- !select --
-[6, 8, 1, 4, 0, 10, 0, 1, 14, 0, 1, 0, 5, 0, 0, 0, 0, 15, 0, 1]
\ No newline at end of file
+
diff --git a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
index 28cc08efb8..fa3ec92b66 100644
--- a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
+++ b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
@@ -16,16 +16,39 @@
 // under the License.
 
 suite("test_multi_string_position") {
-    qt_select "select multi_search_all_positions('jmdqwjbrxlbatqeixknricfk', ['qwjbrxlba', 'jmd', '', 'mdqwjbrxlbatqe', 'jbrxlbatqeixknric', 'jmdqwjbrxlbatqeixknri', '', 'fdtmnwtts', 'qwjbrxlba', '', 'qeixknricfk', 'hzjjgrnoilfkvzxaemzhf', 'lb', 'kamz', 'ixknr', 'jbrxlbatq'])"
-    qt_select "select multi_search_all_positions('coxcctuehmzkbrsmodfvx', ['bkhnp', 'nlypjvriuk', 'rkslxwfqjjivcwdexrdtvjdtvuu', 'oxcctuehm', 'xcctuehmzkbrsm', 'kfrieuocovykjmkwxbdlkgwctwvcuh', 'coxc', 'lbwvetgxyndxjqqwthtkgasbafii', 'ctuehmzkbrsmodfvx', 'obzldxjldxowk', 'ngfikgigeyll', 'wdaejjukowgvzijnw', 'zkbr', 'mzkb', 'tuehm', 'ue'])"
-    qt_select "select multi_search_all_positions('mpswgtljbbrmivkcglamemayfn', ['', 'm', 'saejhpnfgfq', 'rzanrkdssmmkanqjpfi', 'oputeneprgoowg', 'mp', '', '', 'wgtljbbrmivkcglamemay', 'cbpthtrgrmgfypizi', 'tl', 'tlj', 'xuhs', 'brmivkcglamemayfn', '', 'gtljb'])"
-    qt_select "select multi_search_all_positions('arbphzbbecypbzsqsljurtddve', ['arbphzb', 'mnrboimjfijnti', 'cikcrd', 'becypbz', 'z', 'uocmqgnczhdcrvtqrnaxdxjjlhakoszuwc', 'bbe', '', 'bp', 'yhltnexlpdijkdzt', 'jkwjmrckvgmccmmrolqvy', 'vdxmicjmfbtsbqqmqcgtnrvdgaucsgspwg', 'witlfqwvhmmyjrnrzttrikhhsrd', 'pbzsqsljurt'])"
-    qt_select "select multi_search_all_positions('aizovxqpzcbbxuhwtiaaqhdqjdei', ['qpzcbbxuhw', 'jugrpglqbm', 'dspwhzpyjohhtizegrnswhjfpdz', 'pzcbbxuh', 'vayzeszlycke', 'i', 'gvrontcpqavsjxtjwzgwxugiyhkhmhq', 'gyzmeroxztgaurmrqwtmsxcqnxaezuoapatvu', 'xqpzc', 'mjiswsvlvlpqrhhptqq', 'iz', 'hmzjxxfjsvcvdpqwtrdrp', 'zovxqpzcbbxuhwtia', 'ai'])"
-    qt_select "select multi_search_all_positions('ydfgiluhyxwqdfiwtzobwzscyxhuov', ['srsoubrgghleyheujsbwwwykerzlqphgejpxvog', 'axchkyleddjwkvbuyhmekpbbbztxdlm', 'zqodzvlkmfe', 'obwz', 'fi', 'zsc', 'xwq', 'pvmurvrd', 'uulcdtexckmrsokmgdpkstlkoavyrmxeaacvydxf', 'dfi', 'mxcngttujzgtlssrmluaflmjuv', 'hyxwqdfiwtzobwzscyxhu'])"
-    qt_select "select multi_search_all_positions('pyepgwainvmwekwhhqxxvzdjw', ['w', '', '', 'gvvkllofjnxvcu', 'kmwwhboplctvzazcyfpxhwtaddfnhekei', 'gwainv', 'pyepgwain', 'ekpnogkzzmbpfynsunwqp', 'invmwe', 'hrxpiplfplqjsstuybksuteoz', 'gwa', 'akfpyduqrwosxcbdemtxrxvundrgse', 'yepgwainvmw', 'wekwhhqxxvzdjw', 'fyimzvedmyriubgoznmcav', 'whhq', 'ozxowbwdqfisuupyzaqynoprgsjhkwlum', 'vpoufrofekajksdp'])"
-    qt_select "select multi_search_all_positions('lqwahffxurkbhhzytequotkfk', ['rwjqudpuaiufle', 'livwgbnflvy', 'hffxurkbhh', '', '', 'xcajwbqbttzfzfowjubmmgnmssat', 'zytequ', 'lq', 'h', 'rkbhh', 'a', 'immejthwgdr', '', 'llhhnlhcvnxxorzzjt', 'w', 'cvjynqxcivmmmvc', 'wexjomdcmursppjtsweybheyxzleuz', 'fzronsnddfxwlkkzidiknhpjipyrcrzel'])"
-    qt_select "select multi_search_all_positions('nkddriylnakicwgdwrfxpodqea', ['izwdpgrgpmjlwkanjrffgela', '', 'kicw', 'hltmfymgmrjckdiylkzjlvvyuleksikdjrg', 'yigveskrbidknjxigwilmkgyizewikh', 'xyvzhsnqmuec', 'odcgzlavzrwesjks', 'oilvfgliktoujukpgzvhmokdgkssqgqot', 'llsfsurvimbahwqtbqbp', 'nxj', 'pimydixeobdxmdkvhcyzcgnbhzsydx', 'couzmvxedobuohibgxwoxvmpote', 'driylnakicwgdwrf', 'nkddr'])"
-    qt_select "select multi_search_all_positions('jnckhtjqwycyihuejibqmddrdxe', ['tajzx', 'vuddoylclxatcjvinusdwt', 'spxkhxvzsljkmnzpeubszjnhqczavgtqopxn', 'ckhtjqwycyi', 'xlbfzdxspldoes', 'u', 'czosfebeznt', 'gzhabdsuyreisxvyfrfrkq', 'yihuejibqmd', 'jqwycyihuejibqm', 'cfbvprgzx', 'hxu', 'vxbhrfpzacgd', 'afoaij', 'htjqwycyihu', 'httzbskqd'])"
-    qt_select "select multi_search_all_positions('dzejajvpoojdkqbnayahygidyrjmb', ['khwxxvtnqhobbvwgwkpusjlhlzifiuclycml', 'nzvuhtwdaivo', 'dkqbnayahygidyr', 'jajvpoo', 'j', 'wdtbvwmeqgyvetu', 'kqbn', 'idyrjmb', 'tsnxuxevsxrxpgpfdgrkhwqpkse', '', 'efsdgzuefhdzkmquxu', 'zejajvpoojdkqbnayahyg', 'ugwfuighbygrxyctop', 'fcbxzbdugc', 'dxmzzrcplob', 'ejaj', 'wmmupyxrylvawsyfccluiiene', 'ohzmsqhpzbafvbzqwzftbvftei'])"
-    qt_select "select multi_search_all_positions('ffaujlverosspbzaqefjzql', ['lvero', 'erossp', 'f', 'ujlverosspbz', 'btfimgklzzxlbkbuqyrmnud', 'osspb', 'muqexvtjuaar', 'f', 'bzaq', 'lprihswhwkdhqciqhfaowarn', 'ffaujlve', 'uhbbjrqjb', 'jlver', 'umucyhbbu', 'pjthtzmgxhvpbdphesnnztuu', 'xfqhfdfsbbazactpastzvzqudgk', 'lvovjfoatc', 'z', 'givejzhoqsd', ''])"
+    def table_name = "strings"
+
+    sql """ DROP TABLE IF EXISTS ${table_name} """
+    sql """ CREATE TABLE IF NOT EXISTS ${table_name}
+            (
+                `col1`      INT NOT NULL,
+                `content`   TEXT NOT NULL,
+                `mode`      ARRAY<TEXT> NOT NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`col1`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`col1`) BUCKETS 3
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "in_memory" = "false",
+            "storage_format" = "V2"
+            );
+        """
+
+    sql """ INSERT INTO ${table_name} (col1, content, mode) VALUES
+            (1, 'Hello, World!', ['hello', 'world'] ),
+            (2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ),
+            (3, 'hello, world!', ['Hello', 'world'] ),
+            (4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ),
+            (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] );
+        """
+
+    qt_table_select1 "select multi_search_all_positions(content, ['hello', '!', 'world', 'Hello', 'World']) from ${table_name} order by col1"
+    qt_table_select2 "select multi_search_all_positions(content, mode) from ${table_name} order by col1"
+
+    qt_select1 "select multi_search_all_positions('jmdqwjbrxlbatqeixknricfk', ['qwjbrxlba', 'jmd', '', 'mdqwjbrxlbatqe', 'jbrxlbatqeixknric', 'jmdqwjbrxlbatqeixknri', '', 'fdtmnwtts', 'qwjbrxlba', '', 'qeixknricfk', 'hzjjgrnoilfkvzxaemzhf', 'lb', 'kamz', 'ixknr', 'jbrxlbatq'])"
+    qt_select2 "select multi_search_all_positions('coxcctuehmzkbrsmodfvx', ['bkhnp', 'nlypjvriuk', 'rkslxwfqjjivcwdexrdtvjdtvuu', 'oxcctuehm', 'xcctuehmzkbrsm', 'kfrieuocovykjmkwxbdlkgwctwvcuh', 'coxc', 'lbwvetgxyndxjqqwthtkgasbafii', 'ctuehmzkbrsmodfvx', 'obzldxjldxowk', 'ngfikgigeyll', 'wdaejjukowgvzijnw', 'zkbr', 'mzkb', 'tuehm', 'ue'])"
+    qt_select3 "select multi_search_all_positions('mpswgtljbbrmivkcglamemayfn', ['', 'm', 'saejhpnfgfq', 'rzanrkdssmmkanqjpfi', 'oputeneprgoowg', 'mp', '', '', 'wgtljbbrmivkcglamemay', 'cbpthtrgrmgfypizi', 'tl', 'tlj', 'xuhs', 'brmivkcglamemayfn', '', 'gtljb'])"
+    qt_select4 "select multi_search_all_positions('arbphzbbecypbzsqsljurtddve', ['arbphzb', 'mnrboimjfijnti', 'cikcrd', 'becypbz', 'z', 'uocmqgnczhdcrvtqrnaxdxjjlhakoszuwc', 'bbe', '', 'bp', 'yhltnexlpdijkdzt', 'jkwjmrckvgmccmmrolqvy', 'vdxmicjmfbtsbqqmqcgtnrvdgaucsgspwg', 'witlfqwvhmmyjrnrzttrikhhsrd', 'pbzsqsljurt'])"
+    qt_select5 "select multi_search_all_positions('aizovxqpzcbbxuhwtiaaqhdqjdei', ['qpzcbbxuhw', 'jugrpglqbm', 'dspwhzpyjohhtizegrnswhjfpdz', 'pzcbbxuh', 'vayzeszlycke', 'i', 'gvrontcpqavsjxtjwzgwxugiyhkhmhq', 'gyzmeroxztgaurmrqwtmsxcqnxaezuoapatvu', 'xqpzc', 'mjiswsvlvlpqrhhptqq', 'iz', 'hmzjxxfjsvcvdpqwtrdrp', 'zovxqpzcbbxuhwtia', 'ai'])"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org