You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/06/30 00:12:34 UTC
[doris] branch master updated: [feature-wip](array-type) add function arrays_overlap (#10233)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ec6620ae3e [feature-wip](array-type) add function arrays_overlap (#10233)
ec6620ae3e is described below
commit ec6620ae3e190774dfeb34a5041ff81ab8ba4654
Author: camby <10...@qq.com>
AuthorDate: Thu Jun 30 08:12:29 2022 +0800
[feature-wip](array-type) add function arrays_overlap (#10233)
---
be/src/vec/CMakeLists.txt | 2 +
be/src/vec/common/aggregation_common.h | 3 -
be/src/vec/common/hash_table/hash.h | 3 +
.../functions/array/function_array_register.cpp | 2 +
.../vec/functions/array/function_array_utils.cpp | 51 +++++
...n_array_register.cpp => function_array_utils.h} | 26 +--
...ay_register.cpp => function_arrays_overlap.cpp} | 17 +-
.../vec/functions/array/function_arrays_overlap.h | 248 +++++++++++++++++++++
be/src/vec/io/io_helper.h | 1 +
be/test/CMakeLists.txt | 1 +
.../vec/function/function_arrays_overlap_test.cpp | 138 ++++++++++++
.../array-functions/arrays_overlap.md | 66 ++++++
.../array-functions/arrays_overlap.md | 66 ++++++
gensrc/script/doris_builtins_functions.py | 13 ++
.../array_functions/test_array_functions.out | 5 +
.../array_functions/test_array_functions.groovy | 10 +-
16 files changed, 618 insertions(+), 34 deletions(-)
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index b32142ee80..c51f670ab6 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -134,6 +134,8 @@ set(VEC_FILES
functions/array/function_array_register.cpp
functions/array/function_array_size.cpp
functions/array/function_array_aggregation.cpp
+ functions/array/function_array_utils.cpp
+ functions/array/function_arrays_overlap.cpp
exprs/table_function/vexplode_json_array.cpp
functions/math.cpp
functions/function_bitmap.cpp
diff --git a/be/src/vec/common/aggregation_common.h b/be/src/vec/common/aggregation_common.h
index d6a4d30df5..ee3ef5ce5b 100644
--- a/be/src/vec/common/aggregation_common.h
+++ b/be/src/vec/common/aggregation_common.h
@@ -32,9 +32,6 @@
#include "vec/common/string_ref.h"
#include "vec/common/uint128.h"
-template <>
-struct DefaultHash<StringRef> : public StringRefHash {};
-
namespace doris::vectorized {
using Sizes = std::vector<size_t>;
diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h
index 3bb1d94269..fb47657809 100644
--- a/be/src/vec/common/hash_table/hash.h
+++ b/be/src/vec/common/hash_table/hash.h
@@ -91,6 +91,9 @@ struct DefaultHash<T, std::enable_if_t<std::is_arithmetic_v<T>>> {
size_t operator()(T key) const { return default_hash64<T>(key); }
};
+template <>
+struct DefaultHash<StringRef> : public StringRefHash {};
+
template <typename T>
struct HashCRC32;
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp
index 833bc9fbd8..cb5a091c91 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_register.cpp
@@ -26,12 +26,14 @@ void register_function_array_element(SimpleFunctionFactory&);
void register_function_array_index(SimpleFunctionFactory&);
void register_function_array_size(SimpleFunctionFactory&);
void register_function_array_aggregation(SimpleFunctionFactory&);
+void register_function_arrays_overlap(SimpleFunctionFactory&);
void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_element(factory);
register_function_array_index(factory);
register_function_array_size(factory);
register_function_array_aggregation(factory);
+ register_function_arrays_overlap(factory);
}
} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_utils.cpp b/be/src/vec/functions/array/function_array_utils.cpp
new file mode 100644
index 0000000000..582bb02c33
--- /dev/null
+++ b/be/src/vec/functions/array/function_array_utils.cpp
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/array/function_array_utils.h"
+
+#include "vec/columns/column_nullable.h"
+
+namespace doris::vectorized {
+
+bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data) {
+ const IColumn* array_col = &src;
+ // extract array nullable info
+ if (src.is_nullable()) {
+ const auto& null_col = reinterpret_cast<const ColumnNullable&>(src);
+ data.array_nullmap_data = null_col.get_null_map_data().data();
+ array_col = null_col.get_nested_column_ptr().get();
+ }
+
+ // check and get array column
+ data.array_col = check_and_get_column<ColumnArray>(array_col);
+ if (!data.array_col) {
+ return false;
+ }
+
+ // extract array offsets and nested column
+ data.offsets_ptr = &data.array_col->get_offsets();
+ data.nested_col = &data.array_col->get_data();
+ // extract nested column is nullable
+ if (data.nested_col->is_nullable()) {
+ const auto& nested_null_col = reinterpret_cast<const ColumnNullable&>(*data.nested_col);
+ data.nested_nullmap_data = nested_null_col.get_null_map_data().data();
+ data.nested_col = nested_null_col.get_nested_column_ptr().get();
+ }
+ return true;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_utils.h
similarity index 55%
copy from be/src/vec/functions/array/function_array_register.cpp
copy to be/src/vec/functions/array/function_array_utils.h
index 833bc9fbd8..e7173489e0 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_array_utils.h
@@ -14,24 +14,22 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-// This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp
-// and modified by Doris
+#pragma once
-#include "vec/functions/simple_function_factory.h"
+#include "vec/columns/column_array.h"
+#include "vec/data_types/data_type_array.h"
namespace doris::vectorized {
-void register_function_array_element(SimpleFunctionFactory&);
-void register_function_array_index(SimpleFunctionFactory&);
-void register_function_array_size(SimpleFunctionFactory&);
-void register_function_array_aggregation(SimpleFunctionFactory&);
+struct ColumnArrayExecutionData {
+public:
+ const UInt8* array_nullmap_data = nullptr;
+ const ColumnArray* array_col = nullptr;
+ const ColumnArray::Offsets* offsets_ptr = nullptr;
+ const UInt8* nested_nullmap_data = nullptr;
+ const IColumn* nested_col = nullptr;
+};
-void register_function_array(SimpleFunctionFactory& factory) {
- register_function_array_element(factory);
- register_function_array_index(factory);
- register_function_array_size(factory);
- register_function_array_aggregation(factory);
-}
+bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& data);
} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_arrays_overlap.cpp
similarity index 58%
copy from be/src/vec/functions/array/function_array_register.cpp
copy to be/src/vec/functions/array/function_arrays_overlap.cpp
index 833bc9fbd8..e4e54e9135 100644
--- a/be/src/vec/functions/array/function_array_register.cpp
+++ b/be/src/vec/functions/array/function_arrays_overlap.cpp
@@ -14,24 +14,15 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-// This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/registerFunctionsArray.cpp
-// and modified by Doris
+
+#include "vec/functions/array/function_arrays_overlap.h"
#include "vec/functions/simple_function_factory.h"
namespace doris::vectorized {
-void register_function_array_element(SimpleFunctionFactory&);
-void register_function_array_index(SimpleFunctionFactory&);
-void register_function_array_size(SimpleFunctionFactory&);
-void register_function_array_aggregation(SimpleFunctionFactory&);
-
-void register_function_array(SimpleFunctionFactory& factory) {
- register_function_array_element(factory);
- register_function_array_index(factory);
- register_function_array_size(factory);
- register_function_array_aggregation(factory);
+void register_function_arrays_overlap(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionArraysOverlap>();
}
} // namespace doris::vectorized
diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h
new file mode 100644
index 0000000000..b1e10449fa
--- /dev/null
+++ b/be/src/vec/functions/array/function_arrays_overlap.h
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <string_view>
+
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_string.h"
+#include "vec/common/hash_table/hash_set.h"
+#include "vec/common/string_ref.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/array/function_array_utils.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct OverlapSetImpl {
+ using ElementNativeType = typename NativeType<typename T::value_type>::Type;
+ using Set = HashSetWithStackMemory<ElementNativeType, DefaultHash<ElementNativeType>, 4>;
+ Set set;
+ void insert_array(const IColumn* column, size_t start, size_t size) {
+ const auto& vec = assert_cast<const T&>(*column).get_data();
+ for (size_t i = start; i < start + size; ++i) {
+ set.insert(vec[i]);
+ }
+ }
+ bool find_any(const IColumn* column, size_t start, size_t size) {
+ const auto& vec = assert_cast<const T&>(*column).get_data();
+ for (size_t i = start; i < start + size; ++i) {
+ if (set.find(vec[i])) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+template <>
+struct OverlapSetImpl<ColumnString> {
+ using Set = HashSetWithStackMemory<StringRef, DefaultHash<StringRef>, 4>;
+ Set set;
+ void insert_array(const IColumn* column, size_t start, size_t size) {
+ for (size_t i = start; i < start + size; ++i) {
+ set.insert(column->get_data_at(i));
+ }
+ }
+ bool find_any(const IColumn* column, size_t start, size_t size) {
+ for (size_t i = start; i < start + size; ++i) {
+ if (set.find(column->get_data_at(i))) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+class FunctionArraysOverlap : public IFunction {
+public:
+ static constexpr auto name = "arrays_overlap";
+ static FunctionPtr create() { return std::make_shared<FunctionArraysOverlap>(); }
+
+ /// Get function name.
+ String get_name() const override { return name; }
+
+ bool use_default_implementation_for_nulls() const override { return false; }
+
+ bool is_variadic() const override { return false; }
+
+ size_t get_number_of_arguments() const override { return 2; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+ auto left_data_type = remove_nullable(arguments[0]);
+ auto right_data_type = remove_nullable(arguments[1]);
+ DCHECK(is_array(left_data_type)) << arguments[0]->get_name();
+ DCHECK(is_array(right_data_type)) << arguments[1]->get_name();
+ DCHECK(left_data_type->equals(*right_data_type))
+ << "data type " << arguments[0]->get_name() << " not equal with "
+ << arguments[1]->get_name();
+ return make_nullable(std::make_shared<DataTypeUInt8>());
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ auto left_column =
+ block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ auto right_column =
+ block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+ ColumnArrayExecutionData left_exec_data;
+ ColumnArrayExecutionData right_exec_data;
+
+ Status ret = Status::RuntimeError(
+ fmt::format("execute failed, unsupported types for function {}({}, {})", get_name(),
+ block.get_by_position(arguments[0]).type->get_name(),
+ block.get_by_position(arguments[1]).type->get_name()));
+
+ // extract array column
+ if (!extract_column_array_info(*left_column, left_exec_data) ||
+ !extract_column_array_info(*right_column, right_exec_data)) {
+ return ret;
+ }
+
+ // prepare return column
+ auto dst_nested_col = ColumnVector<UInt8>::create(input_rows_count, 0);
+ auto dst_null_map = ColumnVector<UInt8>::create(input_rows_count, 0);
+ UInt8* dst_null_map_data = dst_null_map->get_data().data();
+
+ // any array is null or any elements in array is null, return null
+ RETURN_IF_ERROR(_execute_nullable(left_exec_data, dst_null_map_data));
+ RETURN_IF_ERROR(_execute_nullable(right_exec_data, dst_null_map_data));
+
+ // execute overlap check
+ if (left_exec_data.nested_col->is_column_string()) {
+ ret = _execute_internal<ColumnString>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (left_exec_data.nested_col->is_date_type()) {
+ ret = _execute_internal<ColumnDate>(left_exec_data, right_exec_data, dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (left_exec_data.nested_col->is_datetime_type()) {
+ ret = _execute_internal<ColumnDateTime>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (left_exec_data.nested_col->is_numeric()) {
+ if (check_column<ColumnUInt8>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnUInt8>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnInt8>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnInt8>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnInt16>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnInt16>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnInt32>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnInt32>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnInt64>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnInt64>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnInt128>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnInt128>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnFloat32>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnFloat32>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ } else if (check_column<ColumnFloat64>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnFloat64>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ }
+ } else if (left_exec_data.nested_col->is_column_decimal()) {
+ if (check_column<ColumnDecimal128>(*left_exec_data.nested_col)) {
+ ret = _execute_internal<ColumnDecimal128>(left_exec_data, right_exec_data,
+ dst_null_map_data,
+ dst_nested_col->get_data().data());
+ }
+ }
+
+ if (ret == Status::OK()) {
+ block.replace_by_position(result, ColumnNullable::create(std::move(dst_nested_col),
+ std::move(dst_null_map)));
+ }
+
+ return ret;
+ }
+
+private:
+ Status _execute_nullable(const ColumnArrayExecutionData& data, UInt8* dst_nullmap_data) {
+ for (ssize_t row = 0; row < data.offsets_ptr->size(); ++row) {
+ if (dst_nullmap_data[row]) {
+ continue;
+ }
+
+ if (data.array_nullmap_data && data.array_nullmap_data[row]) {
+ dst_nullmap_data[row] = 1;
+ continue;
+ }
+
+ // any element inside array is NULL, return NULL
+ if (data.nested_nullmap_data) {
+ ssize_t start = (*data.offsets_ptr)[row - 1];
+ ssize_t size = (*data.offsets_ptr)[row] - start;
+ for (ssize_t i = start; i < start + size; ++i) {
+ if (data.nested_nullmap_data[i]) {
+ dst_nullmap_data[row] = 1;
+ break;
+ }
+ }
+ }
+ }
+ return Status::OK();
+ }
+
+ template <typename T>
+ Status _execute_internal(const ColumnArrayExecutionData& left_data,
+ const ColumnArrayExecutionData& right_data,
+ const UInt8* dst_nullmap_data, UInt8* dst_data) {
+ using ExecutorImpl = OverlapSetImpl<T>;
+ for (ssize_t row = 0; row < left_data.offsets_ptr->size(); ++row) {
+ if (dst_nullmap_data[row]) {
+ continue;
+ }
+
+ ssize_t left_start = (*left_data.offsets_ptr)[row - 1];
+ ssize_t left_size = (*left_data.offsets_ptr)[row] - left_start;
+ ssize_t right_start = (*right_data.offsets_ptr)[row - 1];
+ ssize_t right_size = (*right_data.offsets_ptr)[row] - right_start;
+ if (left_size == 0 || right_size == 0) {
+ dst_data[row] = 0;
+ continue;
+ }
+
+ ExecutorImpl impl;
+ if (right_size < left_size) {
+ impl.insert_array(right_data.nested_col, right_start, right_size);
+ dst_data[row] = impl.find_any(left_data.nested_col, left_start, left_size);
+ } else {
+ impl.insert_array(left_data.nested_col, left_start, left_size);
+ dst_data[row] = impl.find_any(right_data.nested_col, right_start, right_size);
+ }
+ }
+ return Status::OK();
+ }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 8425c0ec4d..01c89ed155 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -26,6 +26,7 @@
#include "util/string_parser.hpp"
#include "vec/common/arena.h"
#include "vec/common/exception.h"
+#include "vec/common/string_buffer.hpp"
#include "vec/common/string_ref.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index b245a343d2..879b6f7b24 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -341,6 +341,7 @@ set(VEC_TEST_FILES
vec/function/function_array_element_test.cpp
vec/function/function_array_index_test.cpp
vec/function/function_array_size_test.cpp
+ vec/function/function_arrays_overlap_test.cpp
vec/function/function_bitmap_test.cpp
vec/function/function_comparison_test.cpp
vec/function/function_hash_test.cpp
diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp
new file mode 100644
index 0000000000..053205e403
--- /dev/null
+++ b/be/test/vec/function/function_arrays_overlap_test.cpp
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <string>
+
+#include "function_test_util.h"
+#include "runtime/tuple_row.h"
+#include "util/url_coding.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+TEST(function_arrays_overlap_test, arrays_overlap) {
+ std::string func_name = "arrays_overlap";
+ Array empty_arr;
+
+ // arrays_overlap(Array<Int32>, Array<Int32>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32, TypeIndex::Array,
+ TypeIndex::Int32};
+
+ Array vec1 = {Int32(1), Int32(2), Int32(3)};
+ Array vec2 = {Int32(3)};
+ Array vec3 = {Int32(4), Int32(5)};
+ DataSet data_set = {{{vec1, vec2}, UInt8(1)},
+ {{vec1, vec3}, UInt8(0)},
+ {{Null(), vec1}, Null()},
+ {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+
+ // arrays_overlap(Array<Int128>, Array<Int128>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int128, TypeIndex::Array,
+ TypeIndex::Int128};
+
+ Array vec1 = {Int128(11111111111LL), Int128(22222LL), Int128(333LL)};
+ Array vec2 = {Int128(11111111111LL)};
+ DataSet data_set = {
+ {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+
+ // arrays_overlap(Array<Float64>, Array<Float64>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Float64, TypeIndex::Array,
+ TypeIndex::Float64};
+
+ Array vec1 = {double(1.2345), double(2.222), double(3.0)};
+ Array vec2 = {double(1.2345)};
+ DataSet data_set = {
+ {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+
+ // arrays_overlap(Array<Date>, Array<Date>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Array,
+ TypeIndex::Date};
+
+ Array vec1 = {str_to_date_time("2022-01-02", false), str_to_date_time("", false),
+ str_to_date_time("2022-07-08", false)};
+ Array vec2 = {str_to_date_time("2022-01-02", false)};
+ DataSet data_set = {
+ {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+
+ // arrays_overlap(Array<DateTime>, Array<DateTime>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::Array,
+ TypeIndex::DateTime};
+
+ Array vec1 = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""),
+ str_to_date_time("2022-07-08 00:00:00")};
+ Array vec2 = {str_to_date_time("2022-01-02 00:00:00")};
+ Array vec3 = {str_to_date_time("")};
+ DataSet data_set = {{{vec1, vec2}, UInt8(1)},
+ {{vec1, vec3}, UInt8(1)},
+ {{Null(), vec1}, Null()},
+ {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+
+ // arrays_overlap(Array<Decimal128>, Array<Decimal128>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128, TypeIndex::Array,
+ TypeIndex::Decimal128};
+
+ Array vec1 = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67),
+ ut_type::DECIMALFIELD(0.0)};
+ Array vec2 = {ut_type::DECIMALFIELD(17014116.67)};
+ DataSet data_set = {
+ {{vec1, vec2}, UInt8(1)}, {{Null(), vec1}, Null()}, {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+
+ // arrays_overlap(Array<String>, Array<String>)
+ {
+ InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Array,
+ TypeIndex::String};
+
+ Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)};
+ Array vec2 = {Field("abc", 3)};
+ Array vec3 = {Field("", 0)};
+ DataSet data_set = {{{vec1, vec2}, UInt8(1)},
+ {{vec1, vec3}, UInt8(1)},
+ {{Null(), vec1}, Null()},
+ {{empty_arr, vec1}, UInt8(0)}};
+
+ check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+ }
+}
+
+} // namespace doris::vectorized
diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md b/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
new file mode 100644
index 0000000000..5cd3d30e36
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
@@ -0,0 +1,66 @@
+---
+{
+ "title": "arrays_overlap",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## arrays_overlap
+
+### description
+
+#### Syntax
+
+`BOOLEAN arrays_overlap(ARRAY<T> left, ARRAY<T> right)`
+
+Check if there is any common element for left and right array. Return below values:
+
+```
+1 - if any common element inside left and right array;
+0 - if no common element inside left and right array;
+NULL - when left or right array is NULL; OR any element inside left and right array is NULL;
+```
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+mysql> set enable_vectorized_engine=true;
+
+mysql> select c_left,c_right,arrays_overlap(c_left,c_right) from array_test;
++--------------+-----------+-------------------------------------+
+| c_left | c_right | arrays_overlap(`c_left`, `c_right`) |
++--------------+-----------+-------------------------------------+
+| [1, 2, 3] | [3, 4, 5] | 1 |
+| [1, 2, 3] | [5, 6] | 0 |
+| [1, 2, NULL] | [1] | NULL |
+| NULL | [1, 2] | NULL |
+| [1, 2, 3] | [1, 2] | 1 |
++--------------+-----------+-------------------------------------+
+```
+
+### keywords
+
+ARRAYS_OVERLAP
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
new file mode 100644
index 0000000000..e78aa98a3c
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/arrays_overlap.md
@@ -0,0 +1,66 @@
+---
+{
+ "title": "arrays_overlap",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## arrays_overlap
+
+### description
+
+#### Syntax
+
+`BOOLEAN arrays_overlap(ARRAY<T> left, ARRAY<T> right)`
+
+判断left和right数组中是否包含公共元素。返回结果如下:
+
+```
+1 - left和right数组存在公共元素;
+0 - left和right数组不存在公共元素;
+NULL - left或者right数组为NULL;或者left和right数组中,任意元素为NULL;
+```
+
+### notice
+
+`仅支持向量化引擎中使用`
+
+### example
+
+```
+mysql> set enable_vectorized_engine=true;
+
+mysql> select c_left,c_right,arrays_overlap(c_left,c_right) from array_test;
++--------------+-----------+-------------------------------------+
+| c_left | c_right | arrays_overlap(`c_left`, `c_right`) |
++--------------+-----------+-------------------------------------+
+| [1, 2, 3] | [3, 4, 5] | 1 |
+| [1, 2, 3] | [5, 6] | 0 |
+| [1, 2, NULL] | [1] | NULL |
+| NULL | [1, 2] | NULL |
+| [1, 2, 3] | [1, 2] | 1 |
++--------------+-----------+-------------------------------------+
+```
+
+### keywords
+
+ARRAYS_OVERLAP
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index fc57c9bda2..2b916ea908 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -132,6 +132,19 @@ visible_functions = [
[['element_at', '%element_extract%'], 'VARCHAR', ['ARRAY_VARCHAR', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
[['element_at', '%element_extract%'], 'STRING', ['ARRAY_STRING', 'BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_TINYINT', 'ARRAY_TINYINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_SMALLINT', 'ARRAY_SMALLINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_INT', 'ARRAY_INT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_BIGINT', 'ARRAY_BIGINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_LARGEINT', 'ARRAY_LARGEINT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DATETIME', 'ARRAY_DATETIME'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DATE', 'ARRAY_DATE'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_FLOAT', 'ARRAY_FLOAT'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DOUBLE', 'ARRAY_DOUBLE'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_DECIMALV2', 'ARRAY_DECIMALV2'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_VARCHAR', 'ARRAY_VARCHAR'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['arrays_overlap'], 'BOOLEAN', ['ARRAY_STRING', 'ARRAY_STRING'], '', '', '', 'vec', 'ALWAYS_NULLABLE'],
+
[['array_contains'], 'BOOLEAN', ['ARRAY_TINYINT', 'TINYINT'], '', '', '', 'vec', ''],
[['array_contains'], 'BOOLEAN', ['ARRAY_SMALLINT', 'SMALLINT'], '', '', '', 'vec', ''],
[['array_contains'], 'BOOLEAN', ['ARRAY_INT', 'INT'], '', '', '', 'vec', ''],
diff --git a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out
index 18ed9ec24d..7ba627d097 100644
--- a/regression-test/data/query/sql_functions/array_functions/test_array_functions.out
+++ b/regression-test/data/query/sql_functions/array_functions/test_array_functions.out
@@ -9,3 +9,8 @@
2 1 \N
3 0 0
+-- !select --
+1 true
+2 false
+3 \N
+
diff --git a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy
index 2897da0ae9..3c73c11f6c 100644
--- a/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy
+++ b/regression-test/suites/query/sql_functions/array_functions/test_array_functions.groovy
@@ -27,7 +27,8 @@ suite("test_array_functions", "query") {
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` int(11) NULL COMMENT "",
`k2` ARRAY<int(11)> NOT NULL COMMENT "",
- `k3` ARRAY<VARCHAR(20)> NULL COMMENT ""
+ `k3` ARRAY<VARCHAR(20)> NULL COMMENT "",
+ `k4` ARRAY<int(11)> NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
@@ -36,10 +37,11 @@ suite("test_array_functions", "query") {
"storage_format" = "V2"
)
"""
- sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""]) """
- sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL) """
- sql """ INSERT INTO ${tableName} VALUES(3, [], []) """
+ sql """ INSERT INTO ${tableName} VALUES(1, [1, 2, 3], ["a", "b", ""], [1, 2]) """
+ sql """ INSERT INTO ${tableName} VALUES(2, [4], NULL, [5]) """
+ sql """ INSERT INTO ${tableName} VALUES(3, [], [], NULL) """
qt_select "SELECT k1, size(k2), size(k3) FROM ${tableName} ORDER BY k1"
qt_select "SELECT k1, cardinality(k2), cardinality(k3) FROM ${tableName} ORDER BY k1"
+ qt_select "SELECT k1, arrays_overlap(k2, k4) FROM ${tableName} ORDER BY k1"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org