You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/27 14:22:09 UTC
[doris] branch branch-1.2-lts updated: [feature](function)Support negative index for function split_part (#13914)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 687d9d3300 [feature](function)Support negative index for function split_part (#13914)
687d9d3300 is described below

commit 687d9d3300d3d336ddb739094c1b7764907418bf
Author: Yulei-Yang <yu...@gmail.com>
AuthorDate: Mon Dec 12 09:56:09 2022 +0800

    [feature](function)Support negative index for function split_part (#13914)
---
 be/src/exprs/string_functions.cpp                  | 107 ++++++++--
 be/src/vec/functions/function_string.h             | 221 +++++++++++++--------
 be/test/exprs/string_functions_test.cpp            |  49 +++++
 be/test/vec/function/function_string_test.cpp      |  17 --
 .../sql-functions/string-functions/split_part.md   |  33 ++-
 .../sql-functions/string-functions/split_part.md   |  31 ++-
 .../string_functions/test_string_function.out      |  33 +++
 .../string_functions/test_string_function.groovy   |  15 +-
 8 files changed, 380 insertions(+), 126 deletions(-)

diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp
index b980ad4a7f..0db6741f05 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -920,35 +920,98 @@ static int index_of(const uint8_t* source, int source_offset, int source_count,
     return -1;
 }
 
+static int last_index_of(const uint8_t* source, int source_len, const uint8_t* target,
+                         int target_len, int to_index) {
+    if (to_index < 0) {
+        return -1;
+    }
+    if (to_index >= source_len) {
+        to_index = source_len - 1;
+    }
+    if (target_len == 0) {
+        return to_index;
+    }
+    const uint8_t last = target[target_len - 1];
+    int min = target_len;
+    for (int i = to_index; i >= min; i--) {
+        while (i >= min && source[i] != last) {
+            i--; // Look for last character
+        }
+        if (i >= min) { // Found first character, now look at the rest of v2
+            int j = i - 1;
+            int end = j - target_len + 1;
+            for (int k = target_len - 2; j > end && source[j] == target[k];) {
+                j--;
+                k--;
+            }
+            if (j == end) {
+                return i - target_len + 1;
+            }
+        }
+    }
+    return -1;
+}
+
 StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& content,
                                       const StringVal& delimiter, const IntVal& field) {
-    if (content.is_null || delimiter.is_null || field.is_null || field.val <= 0) {
+    if (content.is_null || delimiter.is_null || field.is_null || field.val == 0) {
         return StringVal::null();
     }
-    std::vector<int> find(field.val, -1); //store substring position
-    int from = 0;
-    for (int i = 1; i <= field.val; i++) { // find
-        int last_index = i - 1;
-        find[last_index] =
-                index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from);
-        from = find[last_index] + delimiter.len;
-        if (find[last_index] == -1) {
-            break;
+
+    if (field.val > 0) {
+        int from = 0;
+        std::vector<int> find(field.val, -1);  //store substring position
+        for (int i = 1; i <= field.val; i++) { // find
+            int last_index = i - 1;
+            find[last_index] =
+                    index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from);
+            from = find[last_index] + delimiter.len;
+            if (find[last_index] == -1) {
+                break;
+            }
         }
-    }
-    if ((field.val > 1 && find[field.val - 2] == -1) ||
-        (field.val == 1 && find[field.val - 1] == -1)) {
-        // field not find return null
-        return StringVal::null();
-    }
-    int start_pos;
-    if (field.val == 1) { // find need split first part
-        start_pos = 0;
+        if ((field.val > 1 && find[field.val - 2] == -1) ||
+            (field.val == 1 && find[field.val - 1] == -1)) {
+            // field not find return null
+            return StringVal::null();
+        }
+        int start_pos;
+        if (field.val == 1) { // find need split first part
+            start_pos = 0;
+        } else {
+            start_pos = find[field.val - 2] + delimiter.len;
+        }
+        int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) - start_pos;
+        return StringVal(content.ptr + start_pos, len);
     } else {
-        start_pos = find[field.val - 2] + delimiter.len;
+        int to = content.len;
+        int abs_field = -field.val;
+        std::vector<int> find(abs_field, -1);  //store substring position
+        for (int i = 1; i <= abs_field; i++) { // find
+            int last_index = i - 1;
+            find[last_index] =
+                    last_index_of(content.ptr, content.len, delimiter.ptr, delimiter.len, to);
+            to = find[last_index] - delimiter.len;
+            if (find[last_index] == -1) {
+                break;
+            }
+        }
+        if ((abs_field > 1 && find[abs_field - 2] == -1) ||
+            (abs_field == 1 && find[abs_field - 1] == -1)) {
+            // field not find return null
+            return StringVal::null();
+        }
+        int end_pos;
+        if (abs_field == 1) { // find need split first part
+            end_pos = content.len - 1;
+        } else {
+            end_pos = find[abs_field - 2] - 1;
+        }
+        int len =
+                end_pos - (find[abs_field - 1] == -1 ? 0 : find[abs_field - 1] + delimiter.len) + 1;
+
+        return StringVal(content.ptr + end_pos - len + 1, len);
     }
-    int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) - start_pos;
-    return StringVal(content.ptr + start_pos, len);
 }
 
 StringVal StringFunctions::replace(FunctionContext* context, const StringVal& origStr,
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index 01760840b7..73089500a6 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1234,8 +1234,8 @@ public:
         return make_nullable(std::make_shared<DataTypeString>());
     }
 
-    bool use_default_implementation_for_nulls() const override { return false; }
-    bool use_default_implementation_for_constants() const override { return true; }
+    bool use_default_implementation_for_nulls() const override { return true; }
+    bool use_default_implementation_for_constants() const override { return false; }
 
     Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
@@ -1251,97 +1251,151 @@ public:
         auto& res_chars = res->get_chars();
         res_offsets.resize(input_rows_count);
 
-        size_t argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
-        for (size_t i = 0; i < argument_size; ++i) {
-            argument_columns[i] =
-                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
-            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
-                // Danger: Here must dispose the null map data first! Because
-                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
-                // of column nullable mem of null map
-                VectorizedUtils::update_null_map(null_map->get_data(),
-                                                 nullable->get_null_map_data());
-                argument_columns[i] = nullable->get_nested_column_ptr();
+        ColumnPtr content_column =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+
+        if (auto* nullable = check_and_get_column<const ColumnNullable>(*content_column)) {
+            // Danger: Here must dispose the null map data first! Because
+            // argument_columns[0]=nullable->get_nested_column_ptr(); will release the mem
+            // of column nullable mem of null map
+            VectorizedUtils::update_null_map(null_map->get_data(), nullable->get_null_map_data());
+            content_column = nullable->get_nested_column_ptr();
+        }
+
+        for (size_t i = 1; i <= 2; i++) {
+            ColumnPtr columnPtr = remove_nullable(block.get_by_position(arguments[i]).column);
+
+            if (!is_column_const(*columnPtr)) {
+                return Status::RuntimeError("Argument at index {} for function {} must be constant",
+                                            i + 1, get_name());
             }
         }
 
-        auto str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
+        auto str_col = assert_cast<const ColumnString*>(content_column.get());
 
-        auto delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
+        const IColumn& delimiter_col = *block.get_by_position(arguments[1]).column;
+        const auto* delimiter_const = typeid_cast<const ColumnConst*>(&delimiter_col);
+        auto delimiter = delimiter_const->get_field().get<String>();
+        int32_t delimiter_size = delimiter.size();
 
-        auto part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
-        auto& part_num_col_data = part_num_col->get_data();
+        const IColumn& part_num_col = *block.get_by_position(arguments[2]).column;
+        const auto* part_num_col_const = typeid_cast<const ColumnConst*>(&part_num_col);
+        auto part_number = part_num_col_const->get_field().get<Int32>();
 
-        for (size_t i = 0; i < input_rows_count; ++i) {
-            if (part_num_col_data[i] <= 0) {
-                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
-                continue;
-            }
+        if (part_number >= 0) {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if (part_number == 0) {
+                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                    continue;
+                }
 
-            auto delimiter = delimiter_col->get_data_at(i);
-            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
-            auto part_number = part_num_col_data[i];
-            auto str = str_col->get_data_at(i);
-            if (delimiter.size == 0) {
-                StringOP::push_empty_string(i, res_chars, res_offsets);
-            } else if (delimiter.size == 1) {
-                // If delimiter is a char, use memchr to split
-                int32_t pre_offset = -1;
-                int32_t offset = -1;
-                int32_t num = 0;
-                while (num < part_number) {
-                    pre_offset = offset;
-                    size_t n = str.size - offset - 1;
-                    const char* pos = reinterpret_cast<const char*>(
-                            memchr(str.data + offset + 1, delimiter_str[0], n));
-                    if (pos != nullptr) {
-                        offset = pos - str.data;
-                        num++;
-                    } else {
-                        offset = str.size;
-                        num = (num == 0) ? 0 : num + 1;
-                        break;
+                auto str = str_col->get_data_at(i);
+                if (delimiter_size == 0) {
+                    StringOP::push_empty_string(i, res_chars, res_offsets);
+                } else if (delimiter_size == 1) {
+                    // If delimiter is a char, use memchr to split
+                    int32_t pre_offset = -1;
+                    int32_t offset = -1;
+                    int32_t num = 0;
+                    while (num < part_number) {
+                        pre_offset = offset;
+                        size_t n = str.size - offset - 1;
+                        const char* pos = reinterpret_cast<const char*>(
+                                memchr(str.data + offset + 1, delimiter[0], n));
+                        if (pos != nullptr) {
+                            offset = pos - str.data;
+                            num++;
+                        } else {
+                            offset = str.size;
+                            num = (num == 0) ? 0 : num + 1;
+                            break;
+                        }
                     }
-                }
 
-                if (num == part_number) {
-                    StringOP::push_value_string(
-                            std::string_view {
-                                    reinterpret_cast<const char*>(str.data + pre_offset + 1),
-                                    (size_t)offset - pre_offset - 1},
-                            i, res_chars, res_offsets);
+                    if (num == part_number) {
+                        StringOP::push_value_string(
+                                std::string_view {
+                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
+                                        (size_t)offset - pre_offset - 1},
+                                i, res_chars, res_offsets);
+                    } else {
+                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                    }
                 } else {
-                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
-                }
-            } else {
-                // If delimiter is a string, use memmem to split
-                int32_t pre_offset = -delimiter.size;
-                int32_t offset = -delimiter.size;
-                int32_t num = 0;
-                while (num < part_number) {
-                    pre_offset = offset;
-                    size_t n = str.size - offset - delimiter.size;
-                    char* pos = reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
-                                                               n, delimiter.data, delimiter.size));
-                    if (pos != nullptr) {
-                        offset = pos - str.data;
-                        num++;
+                    // If delimiter is a string, use memmem to split
+                    int32_t pre_offset = -delimiter_size;
+                    int32_t offset = pre_offset;
+                    int32_t num = 0;
+                    while (num < part_number) {
+                        pre_offset = offset;
+                        size_t n = str.size - offset - delimiter_size;
+                        char* pos = reinterpret_cast<char*>(
+                                memmem(str.data + offset + delimiter_size, n, delimiter.c_str(),
+                                       delimiter_size));
+                        if (pos != nullptr) {
+                            offset = pos - str.data;
+                            num++;
+                        } else {
+                            offset = str.size;
+                            num = (num == 0) ? 0 : num + 1;
+                            break;
+                        }
+                    }
+
+                    if (num == part_number) {
+                        StringOP::push_value_string(
+                                std::string_view {reinterpret_cast<const char*>(
+                                                          str.data + pre_offset + delimiter_size),
+                                                  (size_t)offset - pre_offset - delimiter_size},
+                                i, res_chars, res_offsets);
                     } else {
-                        offset = str.size;
-                        num = (num == 0) ? 0 : num + 1;
-                        break;
+                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
                     }
                 }
-
-                if (num == part_number) {
-                    StringOP::push_value_string(
-                            std::string_view {reinterpret_cast<const char*>(str.data + pre_offset +
-                                                                            delimiter.size),
-                                              (size_t)offset - pre_offset - delimiter.size},
-                            i, res_chars, res_offsets);
+            }
+        } else {
+            part_number = -part_number;
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if (delimiter_size == 0) {
+                    StringOP::push_empty_string(i, res_chars, res_offsets);
                 } else {
-                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                    auto str = str_col->get_data_at(i);
+                    auto str_str = str.to_string();
+                    int32_t offset = str.size;
+                    int32_t pre_offset = offset;
+                    int32_t num = 0;
+                    auto substr = str_str;
+                    while (num <= part_number && offset >= 0) {
+                        offset = (int)substr.rfind(delimiter, offset);
+                        if (offset != -1) {
+                            if (++num == part_number) {
+                                break;
+                            }
+                            pre_offset = offset;
+                            offset = offset - 1;
+                            substr = str_str.substr(0, pre_offset);
+                        } else {
+                            break;
+                        }
+                    }
+                    num = (offset == -1 && num != 0) ? num + 1 : num;
+
+                    if (num == part_number) {
+                        if (offset == -1) {
+                            StringOP::push_value_string(
+                                    std::string_view {reinterpret_cast<const char*>(str.data),
+                                                      (size_t)pre_offset},
+                                    i, res_chars, res_offsets);
+                        } else {
+                            StringOP::push_value_string(
+                                    std::string_view {str_str.substr(
+                                            offset + delimiter_size,
+                                            (size_t)pre_offset - offset - delimiter_size)},
+                                    i, res_chars, res_offsets);
+                        }
+                    } else {
+                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                    }
                 }
             }
         }
@@ -1717,10 +1771,11 @@ public:
             if (!success) {
                 // url is malformed, or url_part is invalid.
                 if (url_part == UrlParser::INVALID) {
-                    return Status::RuntimeError(
-                            "Invalid URL part: {}\n{}", std::string(part.data, part.size),
-                            "(Valid URL parts are 'PROTOCOL', 'HOST', 'PATH', 'REF', 'AUTHORITY', "
-                            "'FILE', 'USERINFO', 'PORT' and 'QUERY')");
+                    return Status::RuntimeError("Invalid URL part: {}\n{}",
+                                                std::string(part.data, part.size),
+                                                "(Valid URL parts are 'PROTOCOL', 'HOST', "
+                                                "'PATH', 'REF', 'AUTHORITY', "
+                                                "'FILE', 'USERINFO', 'PORT' and 'QUERY')");
                 } else {
                     StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
                     continue;
diff --git a/be/test/exprs/string_functions_test.cpp b/be/test/exprs/string_functions_test.cpp
index fa6d912b4e..89c58ecfb3 100644
--- a/be/test/exprs/string_functions_test.cpp
+++ b/be/test/exprs/string_functions_test.cpp
@@ -187,6 +187,55 @@ TEST_F(StringFunctionsTest, split_part) {
             AnyValUtil::from_string(ctx, std::string("#123")),
             StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"), 2));
 
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("234")),
+              StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"),
+                                          -1));
+
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("123#")),
+              StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"),
+                                          -2));
+
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("abc#")),
+              StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"),
+                                          -3));
+
+    EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("abc###123###234"),
+                                                             StringVal("##"), -4));
+
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("234")),
+              StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -1));
+
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")),
+              StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -2));
+
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("123")),
+              StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -3));
+
+    EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("abc")),
+              StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -4));
+
+    EXPECT_EQ(StringVal::null(),
+              StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -5));
+
+    EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("abc#123##234"),
+                                                             StringVal("#"), IntVal::null()));
+
+    EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("abc#123##234"),
+                                                             StringVal::null(), -1));
+
+    EXPECT_EQ(
+            AnyValUtil::from_string(ctx, std::string("")),
+            StringFunctions::split_part(context, StringVal("2019年9月-12月"), StringVal("月"), -1));
+    EXPECT_EQ(
+            AnyValUtil::from_string(ctx, std::string("-12")),
+            StringFunctions::split_part(context, StringVal("2019年9月-12月"), StringVal("月"), -2));
+
+    EXPECT_EQ(
+            AnyValUtil::from_string(ctx, std::string("2019年9")),
+            StringFunctions::split_part(context, StringVal("2019年9月-12月"), StringVal("月"), -3));
+
+    EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("2019年9月-12月"),
+                                                             StringVal("月"), -4));
     delete context;
 }
 
diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
index e996bb13f9..c499718ff1 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -571,23 +571,6 @@ TEST(function_string_test, function_find_in_set_test) {
     check_function<DataTypeInt32, true>(func_name, input_types, data_set);
 }
 
-TEST(function_string_test, function_string_splitpart_test) {
-    std::string func_name = "split_part";
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
-
-    DataSet data_set = {
-            {{std::string("prefix_string1"), std::string("_"), 2}, std::string("string1")},
-            {{std::string("prefix__string2"), std::string("__"), 2}, std::string("string2")},
-            {{std::string("prefix__string2"), std::string("_"), 2}, std::string("")},
-            {{std::string("prefix_string2"), std::string("__"), 1}, Null()},
-            {{Null(), std::string("__"), 1}, Null()},
-            {{std::string("prefix_string"), Null(), 1}, Null()},
-            {{std::string("prefix_string"), std::string("__"), Null()}, Null()},
-            {{std::string("prefix_string"), std::string("__"), -1}, Null()}};
-
-    check_function<DataTypeString, true>(func_name, input_types, data_set);
-}
-
 TEST(function_string_test, function_md5sum_test) {
     std::string func_name = "md5sum";
 
diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/split_part.md b/docs/en/docs/sql-manual/sql-functions/string-functions/split_part.md
index 67a7bc721a..818cf6c9dc 100644
--- a/docs/en/docs/sql-manual/sql-functions/string-functions/split_part.md
+++ b/docs/en/docs/sql-manual/sql-functions/string-functions/split_part.md
@@ -31,7 +31,9 @@ under the License.
 `VARCHAR split party (VARCHAR content, VARCHAR delimiter, INT field)`
 
 
-Returns the specified partition (counting from the beginning) by splitting the string according to the partitioner.
+Returns the specified partition by splitting the string according to the delimiter. If field is positive, splitting and counting from the beginning of content, otherwise from the ending.
+
+`delimiter` and `field` parameter should be constant.
 
 ### example
 
@@ -64,6 +66,35 @@ mysql> select split_part("abca", "a", 1);
 +----------------------------+
 |                            |
 +----------------------------+
+
+mysql> select split_part("prefix_string", "_", -1);
++--------------------------------------+
+| split_part('prefix_string', '_', -1) |
++--------------------------------------+
+| string                               |
++--------------------------------------+
+
+
+mysql> select split_part("prefix_string", "_", -2);
++--------------------------------------+
+| split_part('prefix_string', '_', -2) |
++--------------------------------------+
+| prefix                               |
++--------------------------------------+
+
+mysql> select split_part("abc##123###234", "##", -1);
++----------------------------------------+
+| split_part('abc##123###234', '##', -1) |
++----------------------------------------+
+| 234                                    |
++----------------------------------------+
+
+mysql> select split_part("abc##123###234", "##", -2);
++----------------------------------------+
+| split_part('abc##123###234', '##', -2) |
++----------------------------------------+
+| 123#                                   |
++----------------------------------------+
 ```
 ### keywords
     SPLIT_PART,SPLIT,PART
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_part.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_part.md
index 00e5620f53..0291ae29ca 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_part.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/split_part.md
@@ -31,7 +31,8 @@ under the License.
 `VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field)`
 
 
-根据分割符拆分字符串, 返回指定的分割部分(从一开始计数)。
+根据分割符拆分字符串, 返回指定的分割部分(从一或负一开始计数)。field字段支持负数，代表从右往左倒着截取并取数。
+`delimiter` 和 `field` 参数需要是常量, 不支持变量。
 
 ### example
 
@@ -64,6 +65,34 @@ mysql> select split_part("abca", "a", 1);
 +----------------------------+
 |                            |
 +----------------------------+
+
+mysql> select split_part("prefix_string", "_", -1);
++--------------------------------------+
+| split_part('prefix_string', '_', -1) |
++--------------------------------------+
+| string                               |
++--------------------------------------+
+
+mysql> select split_part("prefix_string", "_", -2);
++--------------------------------------+
+| split_part('prefix_string', '_', -2) |
++--------------------------------------+
+| prefix                               |
++--------------------------------------+
+
+mysql> select split_part("abc##123###234", "##", -1);
++----------------------------------------+
+| split_part('abc##123###234', '##', -1) |
++----------------------------------------+
+| 234                                    |
++----------------------------------------+
+
+mysql> select split_part("abc##123###234", "##", -2);
++----------------------------------------+
+| split_part('abc##123###234', '##', -2) |
++----------------------------------------+
+| 123#                                   |
++----------------------------------------+
 ```
 ### keywords
     SPLIT_PART,SPLIT,PART
diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 8b2a4c8bb0..2ef20aff92 100644
--- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
+++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
@@ -224,6 +224,39 @@ hello
 -- !sql --
 world
 
+-- !sql --
+\\N
+
+-- !sql --
+world
+
+-- !sql --
+hello
+
+-- !sql --
+\\N
+
+-- !sql --
+\\N
+
+-- !sql --
+abc
+
+-- !sql --
+#xyz
+
+-- !sql --
+\\N
+
+-- !sql --
+xyz
+
+-- !sql --
+123#
+
+-- !sql --
+\\N
+
 -- !sql --
 true
 
diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index fb0df8f38f..7c78210608 100644
--- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -117,8 +117,19 @@ suite("test_string_function") {
 
     qt_sql "SELECT REVERSE('hello');"
 
-    qt_sql "select split_part(\"hello world\", \" \", 1);"
-    qt_sql "select split_part(\"hello world\", \" \", 2);"
+    qt_sql "select split_part('hello world', ' ', 1)"
+    qt_sql "select split_part('hello world', ' ', 2)"
+    qt_sql "select split_part('hello world', ' ', 0)"
+    qt_sql "select split_part('hello world', ' ', -1)"
+    qt_sql "select split_part('hello world', ' ', -2)"
+    qt_sql "select split_part('hello world', ' ', -3)"
+    qt_sql "select split_part('abc##123###xyz', '##', 0)"
+    qt_sql "select split_part('abc##123###xyz', '##', 1)"
+    qt_sql "select split_part('abc##123###xyz', '##', 3)"
+    qt_sql "select split_part('abc##123###xyz', '##', 5)"
+    qt_sql "select split_part('abc##123###xyz', '##', -1)"
+    qt_sql "select split_part('abc##123###xyz', '##', -2)"
+    qt_sql "select split_part('abc##123###xyz', '##', -4)"
 
     qt_sql "select starts_with(\"hello world\",\"hello\");"
     qt_sql "select starts_with(\"hello world\",\"world\");"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org