You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/30 00:12:34 UTC

[doris] branch master updated: [feature-wip](array-type) add function arrays_overlap (#10233)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ec6620ae3e [feature-wip](array-type) add function arrays_overlap (#10233)
ec6620ae3e is described below

commit ec6620ae3e190774dfeb34a5041ff81ab8ba4654
Author: camby <10...@qq.com>
AuthorDate: Thu Jun 30 08:12:29 2022 +0800

    [feature-wip](array-type) add function arrays_overlap (#10233)
---
 be/src/vec/CMakeLists.txt                          |   2 +
 be/src/vec/common/aggregation_common.h             |   3 -
 be/src/vec/common/hash_table/hash.h                |   3 +
 .../functions/array/function_array_register.cpp    |   2 +
 .../vec/functions/array/function_array_utils.cpp   |  51 +++++
 ...n_array_register.cpp => function_array_utils.h} |  26 +--
 ...ay_register.cpp => function_arrays_overlap.cpp} |  17 +-
 .../vec/functions/array/function_arrays_overlap.h  | 248 +++++++++++++++++++++
 be/src/vec/io/io_helper.h                          |   1 +
 be/test/CMakeLists.txt                             |   1 +
 .../vec/function/function_arrays_overlap_test.cpp  | 138 ++++++++++++
 .../array-functions/arrays_overlap.md              |  66 ++++++
 .../array-functions/arrays_overlap.md              |  66 ++++++
 gensrc/script/doris_builtins_functions.py          |  13 ++
 .../array_functions/test_array_functions.out       |   5 +
 .../array_functions/test_array_functions.groovy    |  10 +-
 16 files changed, 618 insertions(+), 34 deletions(-)

diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index b32142ee80..c51f670ab6 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -134,6 +134,8 @@ set(VEC_FILES
   functions/array/function_array_register.cpp
   functions/array/function_array_size.cpp
   functions/array/function_array_aggregation.cpp
+  functions/array/function_array_utils.cpp
+  functions/array/function_arrays_overlap.cpp
   exprs/table_function/vexplode_json_array.cpp
   functions/math.cpp
   functions/function_bitmap.cpp
diff --git a/be/src/vec/common/aggregation_common.h b/be/src/vec/common/aggregation_common.h
index d6a4d30df5..ee3ef5ce5b 100644
--- a/be/src/vec/common/aggregation_common.h
+++ b/be/src/vec/common/aggregation_common.h
@@ -32,9 +32,6 @@
 #include "vec/common/string_ref.h"
 #include "vec/common/uint128.h"
 
-template <>
-struct DefaultHash<StringRef> : public StringRefHash {};
-
 namespace doris::vectorized {
 
 using Sizes = std::vector<size_t>;
diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h
index 3bb1d94269..fb47657809 100644
--- a/be/src/vec/common/hash_table/hash.h
+++ b/be/src/vec/common/hash_table/hash.h
@@ -91,6 +91,9 @@ struct DefaultHash<T, std::enable_if_t<std::is_arithmetic_v<T>>> {
     size_t operator()(T key) const { return default_hash64<T>(key); }
 };
 
+template <>
+struct DefaultHash<StringRef> : public StringRefHash {};
+
 template <typename T>
 struct HashCRC32;
 
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp
index 833bc9fbd8..cb5a091c91 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_register.cpp
@@ -26,12 +26,14 @@ void register_function_array_element(SimpleFunctionFactory&);
 void register_function_array_index(SimpleFunctionFactory&);
 void register_function_array_size(SimpleFunctionFactory&);
 void register_function_array_aggregation(SimpleFunctionFactory&);
+void register_function_arrays_overlap(SimpleFunctionFactory&);
 
 void register_function_array(SimpleFunctionFactory& factory) {
     register_function_array_element(factory);
     register_function_array_index(factory);
     register_function_array_size(factory);
     register_function_array_aggregation(factory);
+    register_function_arrays_overlap(factory);
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_utils.cpp b/be/src/vec/functions/array/function_array_utils.cpp
new file mode 100644
index 0000000000..582bb02c33
--- /dev/null
+++ b/be/src/vec/functions/array/function_array_utils.cpp
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/array/function_array_utils.h"
+
+#include "vec/columns/column_nullable.h"
+
+namespace doris::vectorized {
+
+bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data) {
+    const IColumn* array_col = &src;
+    // extract array nullable info
+    if (src.is_nullable()) {
+        const auto& null_col = reinterpret_cast<const ColumnNullable&>(src);
+        data.array_nullmap_data = null_col.get_null_map_data().data();
+        array_col = null_col.get_nested_column_ptr().get();
+    }
+
+    // check and get array column
+    data.array_col = check_and_get_column<ColumnArray>(array_col);
+    if (!data.array_col) {
+        return false;
+    }
+
+    // extract array offsets and nested column
+    data.offsets_ptr = &data.array_col->get_offsets();
+    data.nested_col = &data.array_col->get_data();
+    // extract nested column is nullable
+    if (data.nested_col->is_nullable()) {
+        const auto& nested_null_col = reinterpret_cast<const ColumnNullable&>(*data.nested_col);
+        data.nested_nullmap_data = nested_null_col.get_null_map_data().data();
+        data.nested_col = nested_null_col.get_nested_column_ptr().get();
+    }
+    return true;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_utils.h
similarity index 55%
copy from be/src/vec/functions/array/function_array_register.cpp
copy to be/src/vec/functions/array/function_array_utils.h
index 833bc9fbd8..e7173489e0 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_utils.h
@@ -14,24 +14,22 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-// This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp
-// and modified by Doris
+#pragma once
 
-#include "vec/functions/simple_function_factory.h"
+#include "vec/columns/column_array.h"
+#include "vec/data_types/data_type_array.h"
 
 namespace doris::vectorized {
 
-void register_function_array_element(SimpleFunctionFactory&);
-void register_function_array_index(SimpleFunctionFactory&);
-void register_function_array_size(SimpleFunctionFactory&);
-void register_function_array_aggregation(SimpleFunctionFactory&);
+struct ColumnArrayExecutionData {
+public:
+    const UInt8* array_nullmap_data = nullptr;
+    const ColumnArray* array_col = nullptr;
+    const ColumnArray::Offsets* offsets_ptr = nullptr;
+    const UInt8* nested_nullmap_data = nullptr;
+    const IColumn* nested_col = nullptr;
+};
 
-void register_function_array(SimpleFunctionFactory& factory) {
-    register_function_array_element(factory);
-    register_function_array_index(factory);
-    register_function_array_size(factory);
-    register_function_array_aggregation(factory);
-}
+bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data);
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_arrays_overlap.cpp
similarity index 58%
copy from be/src/vec/functions/array/function_array_register.cpp
copy to be/src/vec/functions/array/function_arrays_overlap.cpp
index 833bc9fbd8..e4e54e9135 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_arrays_overlap.cpp
@@ -14,24 +14,15 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-// This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp
-// and modified by Doris
+
+#include "vec/functions/array/function_arrays_overlap.h"
 
 #include "vec/functions/simple_function_factory.h"
 
 namespace doris::vectorized {
 
-void register_function_array_element(SimpleFunctionFactory&);
-void register_function_array_index(SimpleFunctionFactory&);
-void register_function_array_size(SimpleFunctionFactory&);
-void register_function_array_aggregation(SimpleFunctionFactory&);
-
-void register_function_array(SimpleFunctionFactory& factory) {
-    register_function_array_element(factory);
-    register_function_array_index(factory);
-    register_function_array_size(factory);
-    register_function_array_aggregation(factory);
+void register_function_arrays_overlap(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionArraysOverlap>();
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h
new file mode 100644
index 0000000000..b1e10449fa
--- /dev/null
+++ b/be/src/vec/functions/array/function_arrays_overlap.h
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <string_view>
+
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_string.h"
+#include "vec/common/hash_table/hash_set.h"
+#include "vec/common/string_ref.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/array/function_array_utils.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct OverlapSetImpl {
+    using ElementNativeType = typename NativeType<typename T::value_type>::Type;
+    using Set = HashSetWithStackMemory<ElementNativeType, DefaultHash<ElementNativeType>, 4>;
+    Set set;
+    void insert_array(const IColumn* column, size_t start, size_t size) {
+        const auto& vec = assert_cast<const T&>(*column).get_data();
+        for (size_t i = start; i < start + size; ++i) {
+            set.insert(vec[i]);
+        }
+    }
+    bool find_any(const IColumn* column, size_t start, size_t size) {
+        const auto& vec = assert_cast<const T&>(*column).get_data();
+        for (size_t i = start; i < start + size; ++i) {
+            if (set.find(vec[i])) {
+                return true;
+            }
+        }
+        return false;
+    }
+};
+
+template <>
+struct OverlapSetImpl<ColumnString> {
+    using Set = HashSetWithStackMemory<StringRef, DefaultHash<StringRef>, 4>;
+    Set set;
+    void insert_array(const IColumn* column, size_t start, size_t size) {
+        for (size_t i = start; i < start + size; ++i) {
+            set.insert(column->get_data_at(i));
+        }
+    }
+    bool find_any(const IColumn* column, size_t start, size_t size) {
+        for (size_t i = start; i < start + size; ++i) {
+            if (set.find(column->get_data_at(i))) {
+                return true;
+            }
+        }
+        return false;
+    }
+};
+
+class FunctionArraysOverlap : public IFunction {
+public:
+    static constexpr auto name = "arrays_overlap";
+    static FunctionPtr create() { return std::make_shared<FunctionArraysOverlap>(); }
+
+    /// Get function name.
+    String get_name() const override { return name; }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    bool is_variadic() const override { return false; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        auto left_data_type = remove_nullable(arguments[0]);
+        auto right_data_type = remove_nullable(arguments[1]);
+        DCHECK(is_array(left_data_type)) << arguments[0]->get_name();
+        DCHECK(is_array(right_data_type)) << arguments[1]->get_name();
+        DCHECK(left_data_type->equals(*right_data_type))
+                << "data type " << arguments[0]->get_name() << " not equal with "
+                << arguments[1]->get_name();
+        return make_nullable(std::make_shared<DataTypeUInt8>());
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto left_column =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        auto right_column =
+                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+        ColumnArrayExecutionData left_exec_data;
+        ColumnArrayExecutionData right_exec_data;
+
+        Status ret = Status::RuntimeError(
+                fmt::format("execute failed, unsupported types for function {}({}, {})", get_name(),
+                            block.get_by_position(arguments[0]).type->get_name(),
+                            block.get_by_position(arguments[1]).type->get_name()));
+
+        // extract array column
+        if (!extract_column_array_info(*left_column, left_exec_data) ||
+            !extract_column_array_info(*right_column, right_exec_data)) {
+            return ret;
+        }
+
+        // prepare return column
+        auto dst_nested_col = ColumnVector<UInt8>::create(input_rows_count, 0);
+        auto dst_null_map = ColumnVector<UInt8>::create(input_rows_count, 0);
+        UInt8* dst_null_map_data = dst_null_map->get_data().data();
+
+        // any array is null or any elements in array is null, return null
+        RETURN_IF_ERROR(_execute_nullable(left_exec_data, dst_null_map_data));
+        RETURN_IF_ERROR(_execute_nullable(right_exec_data, dst_null_map_data));
+
+        // execute overlap check
+        if (left_exec_data.nested_col->is_column_string()) {
+            ret = _execute_internal<ColumnString>(left_exec_data, right_exec_data,
+                                                  dst_null_map_data,
+                                                  dst_nested_col->get_data().data());
+        } else if (left_exec_data.nested_col->is_date_type()) {
+            ret = _execute_internal<ColumnDate>(left_exec_data, right_exec_data, dst_null_map_data,
+                                                dst_nested_col->get_data().data());
+        } else if (left_exec_data.nested_col->is_datetime_type()) {
+            ret = _execute_internal<ColumnDateTime>(left_exec_data, right_exec_data,
+                                                    dst_null_map_data,
+                                                    dst_nested_col->get_data().data());
+        } else if (left_exec_data.nested_col->is_numeric()) {
+            if (check_column<ColumnUInt8>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnUInt8>(left_exec_data, right_exec_data,
+                                                     dst_null_map_data,
+                                                     dst_nested_col->get_data().data());
+            } else if (check_column<ColumnInt8>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnInt8>(left_exec_data, right_exec_data,
+                                                    dst_null_map_data,
+                                                    dst_nested_col->get_data().data());
+            } else if (check_column<ColumnInt16>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnInt16>(left_exec_data, right_exec_data,
+                                                     dst_null_map_data,
+                                                     dst_nested_col->get_data().data());
+            } else if (check_column<ColumnInt32>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnInt32>(left_exec_data, right_exec_data,
+                                                     dst_null_map_data,
+                                                     dst_nested_col->get_data().data());
+            } else if (check_column<ColumnInt64>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnInt64>(left_exec_data, right_exec_data,
+                                                     dst_null_map_data,
+                                                     dst_nested_col->get_data().data());
+            } else if (check_column<ColumnInt128>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnInt128>(left_exec_data, right_exec_data,
+                                                      dst_null_map_data,
+                                                      dst_nested_col->get_data().data());
+            } else if (check_column<ColumnFloat32>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnFloat32>(left_exec_data, right_exec_data,
+                                                       dst_null_map_data,
+                                                       dst_nested_col->get_data().data());
+            } else if (check_column<ColumnFloat64>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnFloat64>(left_exec_data, right_exec_data,
+                                                       dst_null_map_data,
+                                                       dst_nested_col->get_data().data());
+            }
+        } else if (left_exec_data.nested_col->is_column_decimal()) {
+            if (check_column<ColumnDecimal128>(*left_exec_data.nested_col)) {
+                ret = _execute_internal<ColumnDecimal128>(left_exec_data, right_exec_data,
+                                                          dst_null_map_data,
+                                                          dst_nested_col->get_data().data());
+            }
+        }
+
+        if (ret == Status::OK()) {
+            block.replace_by_position(result, ColumnNullable::create(std::move(dst_nested_col),
+                                                                     std::move(dst_null_map)));
+        }
+
+        return ret;
+    }
+
+private:
+    Status _execute_nullable(const ColumnArrayExecutionData& data, UInt8* dst_nullmap_data) {
+        for (ssize_t row = 0; row < data.offsets_ptr->size(); ++row) {
+            if (dst_nullmap_data[row]) {
+                continue;
+            }
+
+            if (data.array_nullmap_data && data.array_nullmap_data[row]) {
+                dst_nullmap_data[row] = 1;
+                continue;
+            }
+
+            // any element inside array is NULL, return NULL
+            if (data.nested_nullmap_data) {
+                ssize_t start = (*data.offsets_ptr)[row - 1];
+                ssize_t size = (*data.offsets_ptr)[row] - start;
+                for (ssize_t i = start; i < start + size; ++i) {
+                    if (data.nested_nullmap_data[i]) {
+                        dst_nullmap_data[row] = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        return Status::OK();
+    }
+
+    template <typename T>
+    Status _execute_internal(const ColumnArrayExecutionData& left_data,
+                             const ColumnArrayExecutionData& right_data,
+                             const UInt8* dst_nullmap_data, UInt8* dst_data) {
+        using ExecutorImpl = OverlapSetImpl<T>;
+        for (ssize_t row = 0; row < left_data.offsets_ptr->size(); ++row) {
+            if (dst_nullmap_data[row]) {
+                continue;
+            }
+
+            ssize_t left_start = (*left_data.offsets_ptr)[row - 1];
+            ssize_t left_size = (*left_data.offsets_ptr)[row] - left_start;
+            ssize_t right_start = (*right_data.offsets_ptr)[row - 1];
+            ssize_t right_size = (*right_data.offsets_ptr)[row] - right_start;
+            if (left_size == 0 || right_size == 0) {
+                dst_data[row] = 0;
+                continue;
+            }
+
+            ExecutorImpl impl;
+            if (right_size < left_size) {
+                impl.insert_array(right_data.nested_col, right_start, right_size);
+                dst_data[row] = impl.find_any(left_data.nested_col, left_start, left_size);
+            } else {
+                impl.insert_array(left_data.nested_col, left_start, left_size);
+                dst_data[row] = impl.find_any(right_data.nested_col, right_start, right_size);
+            }
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 8425c0ec4d..01c89ed155 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -26,6 +26,7 @@
 #include "util/string_parser.hpp"
 #include "vec/common/arena.h"
 #include "vec/common/exception.h"
+#include "vec/common/string_buffer.hpp"
 #include "vec/common/string_ref.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index b245a343d2..879b6f7b24 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -341,6 +341,7 @@ set(VEC_TEST_FILES
     vec/function/function_array_element_test.cpp
     vec/function/function_array_index_test.cpp
     vec/function/function_array_size_test.cpp
+    vec/function/function_arrays_overlap_test.cpp
     vec/function/function_bitmap_test.cpp
     vec/function/function_comparison_test.cpp
     vec/function/function_hash_test.cpp
diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp
new file mode 100644
index 0000000000..053205e403
--- /dev/null
+++ b/be/test/vec/function/function_arrays_overlap_test.cpp
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <string>
+
+#include "function_test_util.h"
+#include "runtime/tuple_row.h"
+#include "util/url_coding.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+TEST(function_arrays_overlap_test, arrays_overlap) {
+    std::string func_name = "arrays_overlap";
+    Array empty_arr;
+
+    // arrays_overlap(Array<Int32>, Array<Int32>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Array,
+                                    TypeIndex::Int32};
+
+        Array vec1 = {Int32(1), Int32(2), Int32(3)};
+        Array vec2 = {Int32(3)};
+        Array vec3 = {Int32(4), Int32(5)};
+        DataSet data_set = {{{vec1, vec2}, UInt8(1)},
+                            {{vec1, vec3}, UInt8(0)},
+                            {{Null(), vec1}, Null()},
+                            {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+
+    // arrays_overlap(Array<Int128>, Array<Int128>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int128, TypeIndex::Array,
+                                    TypeIndex::Int128};
+
+        Array vec1 = {Int128(11111111111LL), Int128(22222LL), Int128(333LL)};
+        Array vec2 = {Int128(11111111111LL)};
+        DataSet data_set = {
+                {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+
+    // arrays_overlap(Array<Float64>, Array<Float64>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Float64, TypeIndex::Array,
+                                    TypeIndex::Float64};
+
+        Array vec1 = {double(1.2345), double(2.222), double(3.0)};
+        Array vec2 = {double(1.2345)};
+        DataSet data_set = {
+                {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+
+    // arrays_overlap(Array<Date>, Array<Date>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Array,
+                                    TypeIndex::Date};
+
+        Array vec1 = {str_to_date_time("2022-01-02", false), str_to_date_time("", false),
+                      str_to_date_time("2022-07-08", false)};
+        Array vec2 = {str_to_date_time("2022-01-02", false)};
+        DataSet data_set = {
+                {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+
+    // arrays_overlap(Array<DateTime>, Array<DateTime>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::Array,
+                                    TypeIndex::DateTime};
+
+        Array vec1 = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""),
+                      str_to_date_time("2022-07-08 00:00:00")};
+        Array vec2 = {str_to_date_time("2022-01-02 00:00:00")};
+        Array vec3 = {str_to_date_time("")};
+        DataSet data_set = {{{vec1, vec2}, UInt8(1)},
+                            {{vec1, vec3}, UInt8(1)},
+                            {{Null(), vec1}, Null()},
+                            {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+
+    // arrays_overlap(Array<Decimal128>, Array<Decimal128>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128, TypeIndex::Array,
+                                    TypeIndex::Decimal128};
+
+        Array vec1 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67),
+                      ut_type::DECIMALFIELD(0.0)};
+        Array vec2 = {ut_type::DECIMALFIELD(17014116.67)};
+        DataSet data_set = {
+                {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+
+    // arrays_overlap(Array<String>, Array<String>)
+    {
+        InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Array,
+                                    TypeIndex::String};
+
+        Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
+        Array vec2 = {Field("abc", 3)};
+        Array vec3 = {Field("", 0)};
+        DataSet data_set = {{{vec1, vec2}, UInt8(1)},
+                            {{vec1, vec3}, UInt8(1)},
+                            {{Null(), vec1}, Null()},
+                            {{empty_arr, vec1}, UInt8(0)}};
+
+        check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md b/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
new file mode 100644
index 0000000000..5cd3d30e36
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
@@ -0,0 +1,66 @@
+---
+{
+    "title": "arrays_overlap",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## arrays_overlap
+
+### description
+
+#### Syntax
+
+`BOOLEAN arrays_overlap(ARRAY<T> left, ARRAY<T> right)`
+
+Check if there is any common element for left and right array. Return below values:
+
+```
+1    - if any common element inside left and right array;
+0    - if no common element inside left and right array;
+NULL - when left or right array is NULL; OR any element inside left and right array is NULL;
+```
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+mysql> set enable_vectorized_engine=true;
+
+mysql> select c_left,c_right,arrays_overlap(c_left,c_right) from array_test;
++--------------+-----------+-------------------------------------+
+| c_left       | c_right   | arrays_overlap(`c_left`, `c_right`) |
++--------------+-----------+-------------------------------------+
+| [1, 2, 3]    | [3, 4, 5] |                                   1 |
+| [1, 2, 3]    | [5, 6]    |                                   0 |
+| [1, 2, NULL] | [1]       |                                NULL |
+| NULL         | [1, 2]    |                                NULL |
+| [1, 2, 3]    | [1, 2]    |                                   1 |
++--------------+-----------+-------------------------------------+
+```
+
+### keywords
+
+ARRAYS_OVERLAP
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
new file mode 100644
index 0000000000..e78aa98a3c
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
@@ -0,0 +1,66 @@
+---
+{
+    "title": "arrays_overlap",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## arrays_overlap
+
+### description
+
+#### Syntax
+
+`BOOLEAN arrays_overlap(ARRAY<T> left, ARRAY<T> right)`
+
+判断left和right数组中是否包含公共元素。返回结果如下:
+
+```
+1    - left和right数组存在公共元素;
+0    - left和right数组不存在公共元素;
+NULL - left或者right数组为NULL;或者left和right数组中,任意元素为NULL;
+```
+
+### notice
+
+`仅支持向量化引擎中使用`
+
+### example
+
+```
+mysql> set enable_vectorized_engine=true;
+
+mysql> select c_left,c_right,arrays_overlap(c_left,c_right) from array_test;
++--------------+-----------+-------------------------------------+
+| c_left       | c_right   | arrays_overlap(`c_left`, `c_right`) |
++--------------+-----------+-------------------------------------+
+| [1, 2, 3]    | [3, 4, 5] |                                   1 |
+| [1, 2, 3]    | [5, 6]    |                                   0 |
+| [1, 2, NULL] | [1]       |                                NULL |
+| NULL         | [1, 2]    |                                NULL |
+| [1, 2, 3]    | [1, 2]    |                                   1 |
++--------------+-----------+-------------------------------------+
+```
+
+### keywords
+
+ARRAYS_OVERLAP
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index fc57c9bda2..2b916ea908 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -132,6 +132,19 @@ visible_functions = [
     [['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
 
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_TINYINT', 'ARRAY_TINYINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_SMALLINT', 'ARRAY_SMALLINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_INT', 'ARRAY_INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_BIGINT', 'ARRAY_BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_LARGEINT', 'ARRAY_LARGEINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DATETIME', 'ARRAY_DATETIME'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DATE', 'ARRAY_DATE'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_FLOAT', 'ARRAY_FLOAT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DOUBLE', 'ARRAY_DOUBLE'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DECIMALV2', 'ARRAY_DECIMALV2'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_VARCHAR', 'ARRAY_VARCHAR'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['arrays_overlap'], 'BOOLEAN', ['ARRAY_STRING', 'ARRAY_STRING'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+
     [['array_contains'], 'BOOLEAN', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''],
     [['array_contains'], 'BOOLEAN', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''],
     [['array_contains'], 'BOOLEAN', ['ARRAY_INT', 'INT'], '', '', '', 'vec', ''],
diff --git a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out
index 18ed9ec24d..7ba627d097 100644
--- a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out
+++ b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out
@@ -9,3 +9,8 @@
 2	1	\N
 3	0	0
 
+-- !select --
+1	true
+2	false
+3	\N
+
diff --git a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy
index 2897da0ae9..3c73c11f6c 100644
--- a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy
+++ b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy
@@ -27,7 +27,8 @@ suite("test_array_functions", "query") {
             CREATE TABLE IF NOT EXISTS ${tableName} (
               `k1` int(11) NULL COMMENT "",
               `k2` ARRAY<int(11)> NOT NULL COMMENT "",
-              `k3` ARRAY<VARCHAR(20)> NULL COMMENT ""
+              `k3` ARRAY<VARCHAR(20)> NULL COMMENT "",
+              `k4` ARRAY<int(11)> NULL COMMENT ""
             ) ENGINE=OLAP
             DUPLICATE KEY(`k1`)
             DISTRIBUTED BY HASH(`k1`) BUCKETS 1
@@ -36,10 +37,11 @@ suite("test_array_functions", "query") {
             "storage_format" = "V2"
             )
         """
-    sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""]) """
-    sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL) """
-    sql """ INSERT INTO ${tableName} VALUES(3, [], []) """
+    sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""], [1, 2]) """
+    sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL, [5]) """
+    sql """ INSERT INTO ${tableName} VALUES(3, [], [], NULL) """
 
     qt_select "SELECT k1, size(k2), size(k3) FROM ${tableName} ORDER BY k1"
     qt_select "SELECT k1, cardinality(k2), cardinality(k3) FROM ${tableName} ORDER BY k1"
+    qt_select "SELECT k1, arrays_overlap(k2, k4) FROM ${tableName} ORDER BY k1"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org