You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/19 13:34:35 UTC
[doris] 09/36: [BugFix](functions) fix multi_search_all_positions #18682
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0-alpha
in repository https://gitbox.apache.org/repos/asf/doris.git
commit b883b2019a4c8dda2f58149a991bea66522bd19e
Author: zclllyybb <zh...@selectdb.com>
AuthorDate: Mon Apr 17 08:32:57 2023 +0800
[BugFix](functions) fix multi_search_all_positions #18682
---
be/src/vec/common/string_searcher.h | 66 ----------
.../functions/functions_multi_string_position.cpp | 140 +++++++++++----------
.../search/multi_search_all_positions.md | 16 +--
.../search/multi_search_all_positions.md | 16 +--
.../test_multi_string_position.out | 43 ++++---
.../test_multi_string_position.groovy | 47 +++++--
6 files changed, 149 insertions(+), 179 deletions(-)
diff --git a/be/src/vec/common/string_searcher.h b/be/src/vec/common/string_searcher.h
index 97c5570a81..af76f2100d 100644
--- a/be/src/vec/common/string_searcher.h
+++ b/be/src/vec/common/string_searcher.h
@@ -416,70 +416,4 @@ struct LibCASCIICaseInsensitiveStringSearcher : public StringSearcherBase {
return search(haystack, haystack + haystack_size);
}
};
-
-template <typename StringSearcher>
-class MultiStringSearcherBase {
-private:
- /// needles
- const std::vector<StringRef>& needles;
- /// searchers
- std::vector<StringSearcher> searchers;
- /// last index of needles that was not processed
- size_t last;
-
-public:
- explicit MultiStringSearcherBase(const std::vector<StringRef>& needles_)
- : needles {needles_}, last {0} {
- searchers.reserve(needles.size());
-
- size_t size = needles.size();
- for (int i = 0; i < size; ++i) {
- const char* cur_needle_data = needles[i].data;
- const size_t cur_needle_size = needles[i].size;
-
- searchers.emplace_back(cur_needle_data, cur_needle_size);
- }
- }
-
- /**
- * while (hasMoreToSearch())
- * {
- * search inside the haystack with the known needles
- * }
- */
- bool hasMoreToSearch() {
- if (last >= needles.size()) {
- return false;
- }
-
- return true;
- }
-
- bool searchOne(const uint8_t* haystack, const uint8_t* haystack_end) {
- const size_t size = needles.size();
- if (last >= size) {
- return false;
- }
-
- if (searchers[++last].search(haystack, haystack_end) != haystack_end) {
- return true;
- }
- return false;
- }
-
- template <typename CountCharsCallback, typename AnsType>
- void searchOneAll(const uint8_t* haystack, const uint8_t* haystack_end, AnsType* answer,
- const CountCharsCallback& count_chars) {
- const size_t size = needles.size();
- for (; last < size; ++last) {
- const uint8_t* ptr = searchers[last].search(haystack, haystack_end);
- if (ptr != haystack_end) {
- answer[last] = count_chars(haystack, ptr);
- }
- }
- }
-};
-
-using MultiStringSearcher = MultiStringSearcherBase<ASCIICaseSensitiveStringSearcher>;
-
} // namespace doris
diff --git a/be/src/vec/functions/functions_multi_string_position.cpp b/be/src/vec/functions/functions_multi_string_position.cpp
index a3c3420acb..5b8dd13b9b 100644
--- a/be/src/vec/functions/functions_multi_string_position.cpp
+++ b/be/src/vec/functions/functions_multi_string_position.cpp
@@ -18,6 +18,9 @@
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsMultiStringPosition.h
// and modified by Doris
+#include <cstdint>
+#include <iterator>
+
#include "function.h"
#include "function_helpers.h"
#include "vec/columns/column_array.h"
@@ -103,48 +106,53 @@ public:
}
};
-template <typename Impl>
struct FunctionMultiSearchAllPositionsImpl {
+public:
using ResultType = Int32;
-
+ using SingleSearcher = ASCIICaseSensitiveStringSearcher;
static constexpr auto name = "multi_search_all_positions";
static Status vector_constant(const ColumnString::Chars& haystack_data,
const ColumnString::Offsets& haystack_offsets,
const Array& needles_arr, PaddedPODArray<Int32>& vec_res,
PaddedPODArray<UInt64>& offsets_res) {
- if (needles_arr.size() > std::numeric_limits<UInt8>::max())
+ if (needles_arr.size() > std::numeric_limits<UInt8>::max()) {
return Status::InvalidArgument(
"number of arguments for function {} doesn't match: "
"passed {}, should be at most 255",
name, needles_arr.size());
+ }
- std::vector<StringRef> needles;
- needles.reserve(needles_arr.size());
- for (const auto& needle : needles_arr) needles.emplace_back(needle.get<StringRef>());
-
- auto res_callback = [](const UInt8* start, const UInt8* end) -> Int32 {
- return 1 + Impl::count_chars(reinterpret_cast<const char*>(start),
- reinterpret_cast<const char*>(end));
- };
-
- auto searcher = Impl::create_multi_searcher(needles);
+ const size_t needles_size = needles_arr.size();
+ std::vector<SingleSearcher> searchers;
+ searchers.reserve(needles_size);
+ for (const auto& needle : needles_arr) {
+ searchers.emplace_back(needle.get<StringRef>().data, needle.get<StringRef>().size);
+ }
const size_t haystack_size = haystack_offsets.size();
- const size_t needles_size = needles.size();
-
- vec_res.resize(haystack_size * needles.size());
+ vec_res.resize(haystack_size * needles_size);
offsets_res.resize(haystack_size);
std::fill(vec_res.begin(), vec_res.end(), 0);
- while (searcher.hasMoreToSearch()) {
+ // we traverse to generator answer by Vector's slot of ColumnVector, not by Vector.
+ // TODO: check if the order of loop is best. The large data may make us writing across the line which size out of L2 cache.
+ for (size_t ans_slot_in_row = 0; ans_slot_in_row < searchers.size(); ans_slot_in_row++) {
+ // is i.e. answer slot index in one Vector(row) of answer
+ auto& searcher = searchers[ans_slot_in_row];
size_t prev_haystack_offset = 0;
- for (size_t j = 0, from = 0; j < haystack_size; ++j, from += needles_size) {
+
+ for (size_t haystack_index = 0, res_index = ans_slot_in_row;
+ haystack_index < haystack_size; ++haystack_index, res_index += needles_size) {
const auto* haystack = &haystack_data[prev_haystack_offset];
- const auto* haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset;
- searcher.searchOneAll(haystack, haystack_end, &vec_res[from], res_callback);
- prev_haystack_offset = haystack_offsets[j];
+ const auto* haystack_end =
+ haystack - prev_haystack_offset + haystack_offsets[haystack_index];
+
+ auto ans_now = searcher.search(haystack, haystack_end);
+ vec_res[res_index] =
+ ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1;
+ prev_haystack_offset = haystack_offsets[haystack_index];
}
}
@@ -166,72 +174,72 @@ struct FunctionMultiSearchAllPositionsImpl {
size_t prev_haystack_offset = 0;
size_t prev_needles_offset = 0;
- auto res_callback = [](const UInt8* start, const UInt8* end) -> Int32 {
- return 1 + Impl::count_chars(reinterpret_cast<const char*>(start),
- reinterpret_cast<const char*>(end));
- };
-
- offsets_res.reserve(haystack_offsets.size());
+ offsets_res.reserve(haystack_data.size());
+ uint64_t offset_now = 0;
auto& nested_column =
vectorized::check_and_get_column<vectorized::ColumnNullable>(needles_data)
->get_nested_column();
const ColumnString* needles_data_string = check_and_get_column<ColumnString>(nested_column);
- std::vector<StringRef> needles;
- for (size_t i = 0; i < haystack_offsets.size(); ++i) {
- needles.reserve(needles_offsets[i] - prev_needles_offset);
-
- for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) {
- needles.emplace_back(needles_data_string->get_data_at(j));
+ std::vector<StringRef> needles_for_row;
+ // haystack first, row by row.
+ for (size_t haystack_index = 0; haystack_index < haystack_offsets.size();
+ ++haystack_index) {
+ // get haystack for this row.
+ const auto* haystack = &haystack_data[prev_haystack_offset];
+ const auto* haystack_end =
+ haystack - prev_haystack_offset + haystack_offsets[haystack_index];
+
+ // build needles for this row.
+ needles_for_row.reserve(needles_offsets[haystack_index] - prev_needles_offset);
+ for (size_t j = prev_needles_offset; j < needles_offsets[haystack_index]; ++j) {
+ needles_for_row.emplace_back(needles_data_string->get_data_at(j));
}
-
- const size_t needles_size = needles.size();
- if (needles_size > std::numeric_limits<UInt8>::max())
+ const size_t needles_row_size = needles_for_row.size();
+ if (needles_row_size > std::numeric_limits<UInt8>::max()) {
return Status::InvalidArgument(
"number of arguments for function {} doesn't match: "
"passed {}, should be at most 255",
- name, needles_size);
-
- vec_res.resize(vec_res.size() + needles_size);
-
- auto searcher = Impl::create_multi_searcher(needles);
-
- std::fill(vec_res.begin() + vec_res.size() - needles_size, vec_res.end(), 0);
+ name, needles_row_size);
+ }
- while (searcher.hasMoreToSearch()) {
- const auto* haystack = &haystack_data[prev_haystack_offset];
- const auto* haystack_end = haystack + haystack_offsets[i] - prev_haystack_offset;
- searcher.searchOneAll(haystack, haystack_end,
- &vec_res[vec_res.size() - needles_size], res_callback);
+ // each searcher search for one needle.
+ std::vector<SingleSearcher> searchers;
+ searchers.clear();
+ searchers.reserve(needles_row_size);
+ for (auto needle : needles_for_row) {
+ searchers.emplace_back(needle.data, needle.size);
}
- if (offsets_res.empty())
- offsets_res.push_back(needles_size);
- else
- offsets_res.push_back(offsets_res.back() + needles_size);
+ // search for first so that the ans's size is constant for each row.
+ auto ans_row_begin = vec_res.size();
+ vec_res.resize(vec_res.size() + needles_row_size);
+ offset_now += searchers.size();
+ offsets_res.emplace_back(offset_now);
+
+ //for now haystack, apply needle to search, generator answer by order.
+ for (size_t ans_slot_in_row = 0; ans_slot_in_row < searchers.size();
+ ans_slot_in_row++) {
+ // is i.e. answer slot index in one Vector(row) of answer
+ auto& searcher = searchers[ans_slot_in_row];
+
+ auto ans_now = searcher.search(haystack, haystack_end);
+ vec_res[ans_row_begin + ans_slot_in_row] =
+ ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1;
+ }
- prev_haystack_offset = haystack_offsets[i];
- prev_needles_offset = needles_offsets[i];
- needles.clear();
+ prev_haystack_offset = haystack_offsets[haystack_index];
+ prev_needles_offset = needles_offsets[haystack_index];
+ needles_for_row.clear();
}
return Status::OK();
}
};
-struct MultiSearcherImpl {
- using MultiSearcher = MultiStringSearcher;
-
- static MultiSearcher create_multi_searcher(const std::vector<StringRef>& needles) {
- return MultiSearcher(needles);
- }
-
- static size_t count_chars(const char* begin, const char* end) { return end - begin; }
-};
-
using FunctionMultiSearchAllPositions =
- FunctionMultiStringPosition<FunctionMultiSearchAllPositionsImpl<MultiSearcherImpl>>;
+ FunctionMultiStringPosition<FunctionMultiSearchAllPositionsImpl>;
void register_function_multi_string_position(SimpleFunctionFactory& factory) {
factory.register_function<FunctionMultiSearchAllPositions>();
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md b/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
index 9f5c07fdcc..db52923b6a 100644
--- a/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
@@ -30,8 +30,7 @@ under the License.
`ARRAY<INT> multi_search_all_positions(VARCHAR haystack, ARRAY<VARCHAR> needles)`
-
-Searches for the substrings `needles` in the string `haystack`, and returns array of positions of the found corresponding substrings in the string. Positions are indexed starting from 1.
+Returns an `ARRAY` where the `i`-th element is the position of the `i`-th element in `needles`(i.e. `needle`)'s **first** occurrence in the string `haystack`. Positions are counted from 1, with 0 meaning the element was not found. **Case-sensitive**.
### example
@@ -43,12 +42,13 @@ mysql> select multi_search_all_positions('Hello, World!', ['hello', '!', 'world'
| [0,13,0] |
+----------------------------------------------------------------------+
-mysql> select multi_search_all_positions('abc', ['a', 'bc', 'd']);
-+-----------------------------------------------------+
-| multi_search_all_positions('abc', ['a', 'bc', 'd']) |
-+-----------------------------------------------------+
-| [1,2,0] |
-+-----------------------------------------------------+
+select multi_search_all_positions("Hello, World!", ['hello', '!', 'world', 'Hello', 'World']);
++---------------------------------------------------------------------------------------------+
+| multi_search_all_positions('Hello, World!', ARRAY('hello', '!', 'world', 'Hello', 'World')) |
++---------------------------------------------------------------------------------------------+
+| [0, 13, 0, 1, 8] |
++---------------------------------------------------------------------------------------------+
```
+
### keywords
MULTI_SEARCH,SEARCH,POSITIONS
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
index f93466587d..27f60f1b59 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/search/multi_search_all_positions.md
@@ -30,8 +30,7 @@ under the License.
`ARRAY<INT> multi_search_all_positions(VARCHAR haystack, ARRAY<VARCHAR> needles)`
-
-搜索字符串 `haystack` 中的子字符串 `needles`,并返回在 `haystack` 中找到的相应 `needles` 的位置数组。位置的下标从 1 开始。
+返回一个 `ARRAY`,其中第 `i` 个元素为 `needles` 中第 `i` 个元素 `needle`,在字符串 `haystack` 中**首次**出现的位置。位置从1开始计数,0代表未找到该元素。**大小写敏感**。
### example
@@ -43,12 +42,13 @@ mysql> select multi_search_all_positions('Hello, World!', ['hello', '!', 'world'
| [0,13,0] |
+----------------------------------------------------------------------+
-mysql> select multi_search_all_positions('abc', ['a', 'bc', 'd']);
-+-----------------------------------------------------+
-| multi_search_all_positions('abc', ['a', 'bc', 'd']) |
-+-----------------------------------------------------+
-| [1,2,0] |
-+-----------------------------------------------------+
+select multi_search_all_positions("Hello, World!", ['hello', '!', 'world', 'Hello', 'World']);
++---------------------------------------------------------------------------------------------+
+| multi_search_all_positions('Hello, World!', ARRAY('hello', '!', 'world', 'Hello', 'World')) |
++---------------------------------------------------------------------------------------------+
+| [0, 13, 0, 1, 8] |
++---------------------------------------------------------------------------------------------+
```
+
### keywords
MULTI_SEARCH,SEARCH,POSITIONS
diff --git a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
index 165a97dea6..017fa620c4 100644
--- a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
+++ b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
@@ -1,25 +1,30 @@
-- This file is automatically generated. You should know what you did if you want to edit this
--- !select --
+-- !table_select1 --
+[0, 13, 0, 1, 8]
+[0, 13, 0, 1, 8]
+[1, 13, 8, 0, 0]
+[1, 13, 8, 0, 0]
+[0, 6, 0, 0, 0]
+
+-- !table_select2 --
+[0, 0]
+[0, 0, 1, 13]
+[0, 8]
+[1, 8, 0, 13]
+[1, 1, 4, 0]
+
+-- !select1 --
[4, 1, 1, 2, 6, 1, 1, 0, 4, 1, 14, 0, 10, 0, 16, 6]
--- !select --
+
+-- !select2 --
[0, 0, 0, 2, 3, 0, 1, 0, 5, 0, 0, 0, 11, 10, 6, 7]
--- !select --
+
+-- !select3 --
[1, 1, 0, 0, 0, 1, 1, 1, 4, 0, 6, 6, 0, 10, 1, 5]
--- !select --
+
+-- !select4 --
[1, 0, 0, 8, 6, 0, 7, 1, 3, 0, 0, 0, 0, 12]
--- !select --
+
+-- !select5 --
[7, 0, 0, 8, 0, 2, 0, 0, 6, 0, 2, 0, 3, 1]
--- !select --
-[0, 0, 0, 19, 14, 22, 10, 0, 0, 13, 0, 8]
--- !select --
-[6, 1, 1, 0, 0, 5, 1, 0, 8, 0, 5, 0, 2, 12, 0, 15, 0, 0]
--- !select --
-[0, 0, 5, 1, 1, 0, 15, 1, 5, 10, 4, 0, 1, 0, 3, 0, 0, 0]
--- !select --
-[0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1]
--- !select --
-[0, 0, 0, 3, 0, 15, 0, 0, 12, 7, 0, 0, 0, 0, 5, 0]
--- !select --
-[0, 0, 12, 4, 4, 0, 13, 23, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0]
--- !select --
-[6, 8, 1, 4, 0, 10, 0, 1, 14, 0, 1, 0, 5, 0, 0, 0, 0, 15, 0, 1]
\ No newline at end of file
+
diff --git a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
index 28cc08efb8..fa3ec92b66 100644
--- a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
+++ b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
@@ -16,16 +16,39 @@
// under the License.
suite("test_multi_string_position") {
- qt_select "select multi_search_all_positions('jmdqwjbrxlbatqeixknricfk', ['qwjbrxlba', 'jmd', '', 'mdqwjbrxlbatqe', 'jbrxlbatqeixknric', 'jmdqwjbrxlbatqeixknri', '', 'fdtmnwtts', 'qwjbrxlba', '', 'qeixknricfk', 'hzjjgrnoilfkvzxaemzhf', 'lb', 'kamz', 'ixknr', 'jbrxlbatq'])"
- qt_select "select multi_search_all_positions('coxcctuehmzkbrsmodfvx', ['bkhnp', 'nlypjvriuk', 'rkslxwfqjjivcwdexrdtvjdtvuu', 'oxcctuehm', 'xcctuehmzkbrsm', 'kfrieuocovykjmkwxbdlkgwctwvcuh', 'coxc', 'lbwvetgxyndxjqqwthtkgasbafii', 'ctuehmzkbrsmodfvx', 'obzldxjldxowk', 'ngfikgigeyll', 'wdaejjukowgvzijnw', 'zkbr', 'mzkb', 'tuehm', 'ue'])"
- qt_select "select multi_search_all_positions('mpswgtljbbrmivkcglamemayfn', ['', 'm', 'saejhpnfgfq', 'rzanrkdssmmkanqjpfi', 'oputeneprgoowg', 'mp', '', '', 'wgtljbbrmivkcglamemay', 'cbpthtrgrmgfypizi', 'tl', 'tlj', 'xuhs', 'brmivkcglamemayfn', '', 'gtljb'])"
- qt_select "select multi_search_all_positions('arbphzbbecypbzsqsljurtddve', ['arbphzb', 'mnrboimjfijnti', 'cikcrd', 'becypbz', 'z', 'uocmqgnczhdcrvtqrnaxdxjjlhakoszuwc', 'bbe', '', 'bp', 'yhltnexlpdijkdzt', 'jkwjmrckvgmccmmrolqvy', 'vdxmicjmfbtsbqqmqcgtnrvdgaucsgspwg', 'witlfqwvhmmyjrnrzttrikhhsrd', 'pbzsqsljurt'])"
- qt_select "select multi_search_all_positions('aizovxqpzcbbxuhwtiaaqhdqjdei', ['qpzcbbxuhw', 'jugrpglqbm', 'dspwhzpyjohhtizegrnswhjfpdz', 'pzcbbxuh', 'vayzeszlycke', 'i', 'gvrontcpqavsjxtjwzgwxugiyhkhmhq', 'gyzmeroxztgaurmrqwtmsxcqnxaezuoapatvu', 'xqpzc', 'mjiswsvlvlpqrhhptqq', 'iz', 'hmzjxxfjsvcvdpqwtrdrp', 'zovxqpzcbbxuhwtia', 'ai'])"
- qt_select "select multi_search_all_positions('ydfgiluhyxwqdfiwtzobwzscyxhuov', ['srsoubrgghleyheujsbwwwykerzlqphgejpxvog', 'axchkyleddjwkvbuyhmekpbbbztxdlm', 'zqodzvlkmfe', 'obwz', 'fi', 'zsc', 'xwq', 'pvmurvrd', 'uulcdtexckmrsokmgdpkstlkoavyrmxeaacvydxf', 'dfi', 'mxcngttujzgtlssrmluaflmjuv', 'hyxwqdfiwtzobwzscyxhu'])"
- qt_select "select multi_search_all_positions('pyepgwainvmwekwhhqxxvzdjw', ['w', '', '', 'gvvkllofjnxvcu', 'kmwwhboplctvzazcyfpxhwtaddfnhekei', 'gwainv', 'pyepgwain', 'ekpnogkzzmbpfynsunwqp', 'invmwe', 'hrxpiplfplqjsstuybksuteoz', 'gwa', 'akfpyduqrwosxcbdemtxrxvundrgse', 'yepgwainvmw', 'wekwhhqxxvzdjw', 'fyimzvedmyriubgoznmcav', 'whhq', 'ozxowbwdqfisuupyzaqynoprgsjhkwlum', 'vpoufrofekajksdp'])"
- qt_select "select multi_search_all_positions('lqwahffxurkbhhzytequotkfk', ['rwjqudpuaiufle', 'livwgbnflvy', 'hffxurkbhh', '', '', 'xcajwbqbttzfzfowjubmmgnmssat', 'zytequ', 'lq', 'h', 'rkbhh', 'a', 'immejthwgdr', '', 'llhhnlhcvnxxorzzjt', 'w', 'cvjynqxcivmmmvc', 'wexjomdcmursppjtsweybheyxzleuz', 'fzronsnddfxwlkkzidiknhpjipyrcrzel'])"
- qt_select "select multi_search_all_positions('nkddriylnakicwgdwrfxpodqea', ['izwdpgrgpmjlwkanjrffgela', '', 'kicw', 'hltmfymgmrjckdiylkzjlvvyuleksikdjrg', 'yigveskrbidknjxigwilmkgyizewikh', 'xyvzhsnqmuec', 'odcgzlavzrwesjks', 'oilvfgliktoujukpgzvhmokdgkssqgqot', 'llsfsurvimbahwqtbqbp', 'nxj', 'pimydixeobdxmdkvhcyzcgnbhzsydx', 'couzmvxedobuohibgxwoxvmpote', 'driylnakicwgdwrf', 'nkddr'])"
- qt_select "select multi_search_all_positions('jnckhtjqwycyihuejibqmddrdxe', ['tajzx', 'vuddoylclxatcjvinusdwt', 'spxkhxvzsljkmnzpeubszjnhqczavgtqopxn', 'ckhtjqwycyi', 'xlbfzdxspldoes', 'u', 'czosfebeznt', 'gzhabdsuyreisxvyfrfrkq', 'yihuejibqmd', 'jqwycyihuejibqm', 'cfbvprgzx', 'hxu', 'vxbhrfpzacgd', 'afoaij', 'htjqwycyihu', 'httzbskqd'])"
- qt_select "select multi_search_all_positions('dzejajvpoojdkqbnayahygidyrjmb', ['khwxxvtnqhobbvwgwkpusjlhlzifiuclycml', 'nzvuhtwdaivo', 'dkqbnayahygidyr', 'jajvpoo', 'j', 'wdtbvwmeqgyvetu', 'kqbn', 'idyrjmb', 'tsnxuxevsxrxpgpfdgrkhwqpkse', '', 'efsdgzuefhdzkmquxu', 'zejajvpoojdkqbnayahyg', 'ugwfuighbygrxyctop', 'fcbxzbdugc', 'dxmzzrcplob', 'ejaj', 'wmmupyxrylvawsyfccluiiene', 'ohzmsqhpzbafvbzqwzftbvftei'])"
- qt_select "select multi_search_all_positions('ffaujlverosspbzaqefjzql', ['lvero', 'erossp', 'f', 'ujlverosspbz', 'btfimgklzzxlbkbuqyrmnud', 'osspb', 'muqexvtjuaar', 'f', 'bzaq', 'lprihswhwkdhqciqhfaowarn', 'ffaujlve', 'uhbbjrqjb', 'jlver', 'umucyhbbu', 'pjthtzmgxhvpbdphesnnztuu', 'xfqhfdfsbbazactpastzvzqudgk', 'lvovjfoatc', 'z', 'givejzhoqsd', ''])"
+ def table_name = "strings"
+
+ sql """ DROP TABLE IF EXISTS ${table_name} """
+ sql """ CREATE TABLE IF NOT EXISTS ${table_name}
+ (
+ `col1` INT NOT NULL,
+ `content` TEXT NOT NULL,
+ `mode` ARRAY<TEXT> NOT NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`col1`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`col1`) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2"
+ );
+ """
+
+ sql """ INSERT INTO ${table_name} (col1, content, mode) VALUES
+ (1, 'Hello, World!', ['hello', 'world'] ),
+ (2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ),
+ (3, 'hello, world!', ['Hello', 'world'] ),
+ (4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ),
+ (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] );
+ """
+
+ qt_table_select1 "select multi_search_all_positions(content, ['hello', '!', 'world', 'Hello', 'World']) from ${table_name} order by col1"
+ qt_table_select2 "select multi_search_all_positions(content, mode) from ${table_name} order by col1"
+
+ qt_select1 "select multi_search_all_positions('jmdqwjbrxlbatqeixknricfk', ['qwjbrxlba', 'jmd', '', 'mdqwjbrxlbatqe', 'jbrxlbatqeixknric', 'jmdqwjbrxlbatqeixknri', '', 'fdtmnwtts', 'qwjbrxlba', '', 'qeixknricfk', 'hzjjgrnoilfkvzxaemzhf', 'lb', 'kamz', 'ixknr', 'jbrxlbatq'])"
+ qt_select2 "select multi_search_all_positions('coxcctuehmzkbrsmodfvx', ['bkhnp', 'nlypjvriuk', 'rkslxwfqjjivcwdexrdtvjdtvuu', 'oxcctuehm', 'xcctuehmzkbrsm', 'kfrieuocovykjmkwxbdlkgwctwvcuh', 'coxc', 'lbwvetgxyndxjqqwthtkgasbafii', 'ctuehmzkbrsmodfvx', 'obzldxjldxowk', 'ngfikgigeyll', 'wdaejjukowgvzijnw', 'zkbr', 'mzkb', 'tuehm', 'ue'])"
+ qt_select3 "select multi_search_all_positions('mpswgtljbbrmivkcglamemayfn', ['', 'm', 'saejhpnfgfq', 'rzanrkdssmmkanqjpfi', 'oputeneprgoowg', 'mp', '', '', 'wgtljbbrmivkcglamemay', 'cbpthtrgrmgfypizi', 'tl', 'tlj', 'xuhs', 'brmivkcglamemayfn', '', 'gtljb'])"
+ qt_select4 "select multi_search_all_positions('arbphzbbecypbzsqsljurtddve', ['arbphzb', 'mnrboimjfijnti', 'cikcrd', 'becypbz', 'z', 'uocmqgnczhdcrvtqrnaxdxjjlhakoszuwc', 'bbe', '', 'bp', 'yhltnexlpdijkdzt', 'jkwjmrckvgmccmmrolqvy', 'vdxmicjmfbtsbqqmqcgtnrvdgaucsgspwg', 'witlfqwvhmmyjrnrzttrikhhsrd', 'pbzsqsljurt'])"
+ qt_select5 "select multi_search_all_positions('aizovxqpzcbbxuhwtiaaqhdqjdei', ['qpzcbbxuhw', 'jugrpglqbm', 'dspwhzpyjohhtizegrnswhjfpdz', 'pzcbbxuh', 'vayzeszlycke', 'i', 'gvrontcpqavsjxtjwzgwxugiyhkhmhq', 'gyzmeroxztgaurmrqwtmsxcqnxaezuoapatvu', 'xqpzc', 'mjiswsvlvlpqrhhptqq', 'iz', 'hmzjxxfjsvcvdpqwtrdrp', 'zovxqpzcbbxuhwtia', 'ai'])"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org