You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/17 00:48:46 UTC
[incubator-doris] branch master updated: [Vectorized][Function] add orthogonal bitmap agg functions (#10126)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 44e979e43b [Vectorized][Function] add orthogonal bitmap agg functions (#10126)
44e979e43b is described below
commit 44e979e43b0ebdb961ce64782dfdc17613f43ed5
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Jun 17 08:48:41 2022 +0800
[Vectorized][Function] add orthogonal bitmap agg functions (#10126)
* [Vectorized][Function] add orthogonal bitmap agg functions
save some file about orthogonal bitmap function
add some file to rebase
update functions file
* refactor union_count function
refactor orthogonal union count functions
* remove bool is_variadic
---
be/src/exprs/bitmap_function.cpp | 256 ++-------------------
be/src/runtime/string_value.h | 1 +
be/src/util/bitmap_intersect.h | 245 ++++++++++++++++++++
be/src/vec/CMakeLists.txt | 1 +
.../aggregate_function_orthogonal_bitmap.cpp | 99 ++++++++
.../aggregate_function_orthogonal_bitmap.h | 247 ++++++++++++++++++++
.../aggregate_function_simple_factory.cpp | 2 +
be/test/exprs/bitmap_function_test.cpp | 7 +-
docs/.vuepress/sidebar/en/docs.js | 1 +
docs/.vuepress/sidebar/zh-CN/docs.js | 1 +
.../bitmap-functions/intersect_count.md | 57 +++++
.../bitmap-functions/intersect_count.md | 56 +++++
.../apache/doris/analysis/FunctionCallExpr.java | 11 +-
.../apache/doris/catalog/AggregateFunction.java | 7 +-
.../java/org/apache/doris/catalog/FunctionSet.java | 21 ++
15 files changed, 772 insertions(+), 240 deletions(-)
diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 5e38ab8f79..e45f7244b9 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -20,141 +20,12 @@
#include "exprs/anyval_util.h"
#include "gutil/strings/numbers.h"
#include "gutil/strings/split.h"
+#include "util/bitmap_intersect.h"
#include "util/bitmap_value.h"
#include "util/string_parser.hpp"
namespace doris {
-namespace detail {
-
-const int DATETIME_PACKED_TIME_BYTE_SIZE = 8;
-const int DATETIME_TYPE_BYTE_SIZE = 4;
-
-const int DECIMAL_BYTE_SIZE = 16;
-
-// get_val start
-template <typename ValType, typename T>
-T get_val(const ValType& x) {
- DCHECK(!x.is_null);
- return x.val;
-}
-
-template <>
-StringValue get_val(const StringVal& x) {
- DCHECK(!x.is_null);
- return StringValue::from_string_val(x);
-}
-
-template <>
-DateTimeValue get_val(const DateTimeVal& x) {
- return DateTimeValue::from_datetime_val(x);
-}
-
-template <>
-DecimalV2Value get_val(const DecimalV2Val& x) {
- return DecimalV2Value::from_decimal_val(x);
-}
-// get_val end
-
-// serialize_size start
-template <typename T>
-int32_t serialize_size(const T& v) {
- return sizeof(T);
-}
-
-template <>
-int32_t serialize_size(const DateTimeValue& v) {
- return DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE;
-}
-
-template <>
-int32_t serialize_size(const DecimalV2Value& v) {
- return DECIMAL_BYTE_SIZE;
-}
-
-template <>
-int32_t serialize_size(const StringValue& v) {
- return v.len + 4;
-}
-// serialize_size end
-
-// write_to start
-template <typename T>
-char* write_to(const T& v, char* dest) {
- size_t type_size = sizeof(T);
- memcpy(dest, &v, type_size);
- dest += type_size;
- return dest;
-}
-
-template <>
-char* write_to(const DateTimeValue& v, char* dest) {
- DateTimeVal value;
- v.to_datetime_val(&value);
- *(int64_t*)dest = value.packed_time;
- dest += DATETIME_PACKED_TIME_BYTE_SIZE;
- *(int*)dest = value.type;
- dest += DATETIME_TYPE_BYTE_SIZE;
- return dest;
-}
-
-template <>
-char* write_to(const DecimalV2Value& v, char* dest) {
- __int128 value = v.value();
- memcpy(dest, &value, DECIMAL_BYTE_SIZE);
- dest += DECIMAL_BYTE_SIZE;
- return dest;
-}
-
-template <>
-char* write_to(const StringValue& v, char* dest) {
- *(int32_t*)dest = v.len;
- dest += 4;
- memcpy(dest, v.ptr, v.len);
- dest += v.len;
- return dest;
-}
-// write_to end
-
-// read_from start
-template <typename T>
-void read_from(const char** src, T* result) {
- size_t type_size = sizeof(T);
- memcpy(result, *src, type_size);
- *src += type_size;
-}
-
-template <>
-void read_from(const char** src, DateTimeValue* result) {
- DateTimeVal value;
- value.is_null = false;
- value.packed_time = *(int64_t*)(*src);
- *src += DATETIME_PACKED_TIME_BYTE_SIZE;
- value.type = *(int*)(*src);
- *src += DATETIME_TYPE_BYTE_SIZE;
- *result = DateTimeValue::from_datetime_val(value);
- ;
-}
-
-template <>
-void read_from(const char** src, DecimalV2Value* result) {
- __int128 v = 0;
- memcpy(&v, *src, DECIMAL_BYTE_SIZE);
- *src += DECIMAL_BYTE_SIZE;
- *result = DecimalV2Value(v);
-}
-
-template <>
-void read_from(const char** src, StringValue* result) {
- int32_t length = *(int32_t*)(*src);
- *src += 4;
- *result = StringValue((char*)*src, length);
- *src += length;
-}
-// read_from end
-
-} // namespace detail
-
static StringVal serialize(FunctionContext* ctx, BitmapValue* value) {
if (!value) {
BitmapValue empty_bitmap;
@@ -168,98 +39,6 @@ static StringVal serialize(FunctionContext* ctx, BitmapValue* value) {
}
}
-// Calculate the intersection of two or more bitmaps
-// Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...)
-// Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps
-// Todo(kks) Use Array type instead of variable arguments
-template <typename T>
-struct BitmapIntersect {
-public:
- BitmapIntersect() {}
-
- explicit BitmapIntersect(const char* src) { deserialize(src); }
-
- void add_key(const T key) {
- BitmapValue empty_bitmap;
- _bitmaps[key] = empty_bitmap;
- }
-
- void update(const T& key, const BitmapValue& bitmap) {
- if (_bitmaps.find(key) != _bitmaps.end()) {
- _bitmaps[key] |= bitmap;
- }
- }
-
- void merge(const BitmapIntersect& other) {
- for (auto& kv : other._bitmaps) {
- if (_bitmaps.find(kv.first) != _bitmaps.end()) {
- _bitmaps[kv.first] |= kv.second;
- } else {
- _bitmaps[kv.first] = kv.second;
- }
- }
- }
-
- // intersection
- BitmapValue intersect() const {
- BitmapValue result;
- auto it = _bitmaps.begin();
- result |= it->second;
- it++;
- for (; it != _bitmaps.end(); it++) {
- result &= it->second;
- }
- return result;
- }
-
- // calculate the intersection for _bitmaps's bitmap values
- int64_t intersect_count() const {
- if (_bitmaps.empty()) {
- return 0;
- }
- return intersect().cardinality();
- }
-
- // the serialize size
- size_t size() {
- size_t size = 4;
- for (auto& kv : _bitmaps) {
- size += detail::serialize_size(kv.first);
- ;
- size += kv.second.getSizeInBytes();
- }
- return size;
- }
-
- //must call size() first
- void serialize(char* dest) {
- char* writer = dest;
- *(int32_t*)writer = _bitmaps.size();
- writer += 4;
- for (auto& kv : _bitmaps) {
- writer = detail::write_to(kv.first, writer);
- kv.second.write(writer);
- writer += kv.second.getSizeInBytes();
- }
- }
-
- void deserialize(const char* src) {
- const char* reader = src;
- int32_t bitmaps_size = *(int32_t*)reader;
- reader += 4;
- for (int32_t i = 0; i < bitmaps_size; i++) {
- T key;
- detail::read_from(&reader, &key);
- BitmapValue bitmap(reader);
- reader += bitmap.getSizeInBytes();
- _bitmaps[key] = bitmap;
- }
- }
-
-private:
- std::map<T, BitmapValue> _bitmaps;
-};
-
void BitmapFunctions::init() {}
void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) {
@@ -403,7 +182,7 @@ StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVa
return result;
}
-// This is a init function for intersect_count not for bitmap_intersect.
+// This is a init function for intersect_count not for bitmap_intersect, not for _orthogonal_bitmap_intersect(bitmap,t,t)
template <typename T, typename ValType>
void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) {
dst->is_null = false;
@@ -414,12 +193,14 @@ void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
DCHECK(ctx->is_arg_constant(i));
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
- intersect->add_key(detail::get_val<ValType, T>(*arg));
+ intersect->add_key(detail::Helper::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
}
+// This is a update function for intersect_count/ORTHOGONAL_BITMAP_INTERSECT_COUNT/ORTHOGONAL_BITMAP_INTERSECT(bitmap,t,t)
+// not for bitmap_intersect(Bitmap)
template <typename T, typename ValType>
void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const StringVal& src,
const ValType& key, int num_key, const ValType* keys,
@@ -427,13 +208,14 @@ void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const String
auto* dst_bitmap = reinterpret_cast<BitmapIntersect<T>*>(dst->ptr);
// zero size means the src input is a agg object
if (src.len == 0) {
- dst_bitmap->update(detail::get_val<ValType, T>(key),
+ dst_bitmap->update(detail::Helper::get_val<ValType, T>(key),
*reinterpret_cast<BitmapValue*>(src.ptr));
} else {
- dst_bitmap->update(detail::get_val<ValType, T>(key), BitmapValue((char*)src.ptr));
+ dst_bitmap->update(detail::Helper::get_val<ValType, T>(key), BitmapValue((char*)src.ptr));
}
}
+//only for intersect_count(bitmap,t,t)
template <typename T>
void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src,
const StringVal* dst) {
@@ -441,6 +223,7 @@ void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringV
dst_bitmap->merge(BitmapIntersect<T>((char*)src.ptr));
}
+//only for intersect_count(bitmap,t,t)
template <typename T>
StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
@@ -450,6 +233,7 @@ StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, cons
return result;
}
+//only for intersect_count(bitmap,t,t)
template <typename T>
BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src) {
auto* src_bitmap = reinterpret_cast<BitmapIntersect<T>*>(src.ptr);
@@ -928,13 +712,15 @@ StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const Strin
return serialize(ctx, &ret_bitmap);
}
-
+// init ORTHOGONAL_BITMAP_UNION_COUNT(bitmap)
+// update bitmap_union()
void BitmapFunctions::orthogonal_bitmap_union_count_init(FunctionContext* ctx, StringVal* dst) {
dst->is_null = false;
dst->len = sizeof(BitmapValue);
dst->ptr = (uint8_t*)new BitmapValue();
}
+// serialize for ORTHOGONAL_BITMAP_UNION_COUNT(bitmap)
StringVal BitmapFunctions::orthogonal_bitmap_count_serialize(FunctionContext* ctx,
const StringVal& src) {
if (src.is_null) {
@@ -950,7 +736,7 @@ StringVal BitmapFunctions::orthogonal_bitmap_count_serialize(FunctionContext* ct
return result;
}
-// This is a init function for bitmap_intersect.
+// This is a init function for orthogonal_bitmap_intersect(bitmap,t,t).
template <typename T, typename ValType>
void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) {
// constant args start from index 2
@@ -961,7 +747,7 @@ void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, Str
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
- intersect->add_key(detail::get_val<ValType, T>(*arg));
+ intersect->add_key(detail::Helper::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
@@ -972,7 +758,7 @@ void BitmapFunctions::orthogonal_bitmap_intersect_init(FunctionContext* ctx, Str
}
}
-// This is a init function for intersect_count.
+// This is a init function for orthogonal_bitmap_intersect_count(bitmap,t,t).
template <typename T, typename ValType>
void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ctx, StringVal* dst) {
if (ctx->get_num_constant_args() > 1) {
@@ -983,7 +769,7 @@ void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ct
// constant args start from index 2
for (int i = 2; i < ctx->get_num_constant_args(); ++i) {
ValType* arg = reinterpret_cast<ValType*>(ctx->get_constant_arg(i));
- intersect->add_key(detail::get_val<ValType, T>(*arg));
+ intersect->add_key(detail::Helper::get_val<ValType, T>(*arg));
}
dst->ptr = (uint8_t*)intersect;
@@ -995,6 +781,9 @@ void BitmapFunctions::orthogonal_bitmap_intersect_count_init(FunctionContext* ct
}
}
+// This is a serialize function for orthogonal_bitmap_intersect(bitmap,t,t).
+// merge is ths simple bitmap_union() function LINE(80);
+// finalize is the bitmap_serialize() function LINE(173)
template <typename T>
StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize(FunctionContext* ctx,
const StringVal& src) {
@@ -1014,6 +803,8 @@ BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize(FunctionContext*
return result;
}
+// This is a merge function for orthogonal_bitmap_intersect_count(bitmap,t,t).
+// and merge for ORTHOGONAL_BITMAP_UNION_COUNT(bitmap)
void BitmapFunctions::orthogonal_bitmap_count_merge(FunctionContext* context, const StringVal& src,
StringVal* dst) {
if (dst->len != sizeof(int64_t)) {
@@ -1027,6 +818,8 @@ void BitmapFunctions::orthogonal_bitmap_count_merge(FunctionContext* context, co
*(int64_t*)dst->ptr += *(int64_t*)src.ptr;
}
+// This is a finalize function for orthogonal_bitmap_intersect_count(bitmap,t,t).
+// finalize for ORTHOGONAL_BITMAP_UNION_COUNT(bitmap)
BigIntVal BitmapFunctions::orthogonal_bitmap_count_finalize(FunctionContext* context,
const StringVal& src) {
auto* pval = reinterpret_cast<int64_t*>(src.ptr);
@@ -1035,6 +828,7 @@ BigIntVal BitmapFunctions::orthogonal_bitmap_count_finalize(FunctionContext* con
return result;
}
+// This is a serialize function for orthogonal_bitmap_intersect_count(bitmap,t,t).
template <typename T>
StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize(FunctionContext* ctx,
const StringVal& src) {
diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h
index f15b26571e..13b3852a5d 100644
--- a/be/src/runtime/string_value.h
+++ b/be/src/runtime/string_value.h
@@ -90,6 +90,7 @@ struct StringValue {
StringValue(char* ptr, int len) : ptr(ptr), len(len) {}
StringValue(const char* ptr, int len) : ptr(const_cast<char*>(ptr)), len(len) {}
StringValue() : ptr(nullptr), len(0) {}
+ StringValue(const StringRef& str) : ptr(const_cast<char*>(str.data)), len(str.size) {}
/// Construct a StringValue from 's'. 's' must be valid for as long as
/// this object is valid.
diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h
new file mode 100644
index 0000000000..dcda6ae5a5
--- /dev/null
+++ b/be/src/util/bitmap_intersect.h
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+#include "runtime/string_value.h"
+#include "udf/udf.h"
+#include "util/bitmap_value.h"
+
+namespace doris {
+
+namespace detail {
+class Helper {
+public:
+ static const int DATETIME_PACKED_TIME_BYTE_SIZE = 8;
+ static const int DATETIME_TYPE_BYTE_SIZE = 4;
+ static const int DECIMAL_BYTE_SIZE = 16;
+
+ // get_val start
+ template <typename ValType, typename T>
+ static T get_val(const ValType& x) {
+ DCHECK(!x.is_null);
+ return x.val;
+ }
+
+ // serialize_size start
+ template <typename T>
+ static int32_t serialize_size(const T& v) {
+ return sizeof(T);
+ }
+
+ // write_to start
+ template <typename T>
+ static char* write_to(const T& v, char* dest) {
+ size_t type_size = sizeof(T);
+ memcpy(dest, &v, type_size);
+ dest += type_size;
+ return dest;
+ }
+
+ // read_from start
+ template <typename T>
+ static void read_from(const char** src, T* result) {
+ size_t type_size = sizeof(T);
+ memcpy(result, *src, type_size);
+ *src += type_size;
+ }
+};
+
+template <>
+inline StringValue Helper::get_val<StringVal>(const StringVal& x) {
+ DCHECK(!x.is_null);
+ return StringValue::from_string_val(x);
+}
+
+template <>
+inline DateTimeValue Helper::get_val<DateTimeVal>(const DateTimeVal& x) {
+ return DateTimeValue::from_datetime_val(x);
+}
+
+template <>
+inline DecimalV2Value Helper::get_val<DecimalV2Val>(const DecimalV2Val& x) {
+ return DecimalV2Value::from_decimal_val(x);
+}
+// get_val end
+
+template <>
+inline char* Helper::write_to<DateTimeValue>(const DateTimeValue& v, char* dest) {
+ DateTimeVal value;
+ v.to_datetime_val(&value);
+ *(int64_t*)dest = value.packed_time;
+ dest += DATETIME_PACKED_TIME_BYTE_SIZE;
+ *(int*)dest = value.type;
+ dest += DATETIME_TYPE_BYTE_SIZE;
+ return dest;
+}
+
+template <>
+inline char* Helper::write_to<DecimalV2Value>(const DecimalV2Value& v, char* dest) {
+ __int128 value = v.value();
+ memcpy(dest, &value, DECIMAL_BYTE_SIZE);
+ dest += DECIMAL_BYTE_SIZE;
+ return dest;
+}
+
+template <>
+inline char* Helper::write_to<StringValue>(const StringValue& v, char* dest) {
+ *(int32_t*)dest = v.len;
+ dest += 4;
+ memcpy(dest, v.ptr, v.len);
+ dest += v.len;
+ return dest;
+}
+// write_to end
+
+template <>
+inline int32_t Helper::serialize_size<DateTimeValue>(const DateTimeValue& v) {
+ return Helper::DATETIME_PACKED_TIME_BYTE_SIZE + Helper::DATETIME_TYPE_BYTE_SIZE;
+}
+
+template <>
+inline int32_t Helper::serialize_size<DecimalV2Value>(const DecimalV2Value& v) {
+ return Helper::DECIMAL_BYTE_SIZE;
+}
+
+template <>
+inline int32_t Helper::serialize_size<StringValue>(const StringValue& v) {
+ return v.len + 4;
+}
+// serialize_size end
+
+template <>
+inline void Helper::read_from<DateTimeValue>(const char** src, DateTimeValue* result) {
+ DateTimeVal value;
+ value.is_null = false;
+ value.packed_time = *(int64_t*)(*src);
+ *src += DATETIME_PACKED_TIME_BYTE_SIZE;
+ value.type = *(int*)(*src);
+ *src += DATETIME_TYPE_BYTE_SIZE;
+ *result = DateTimeValue::from_datetime_val(value);
+}
+
+template <>
+inline void Helper::read_from<DecimalV2Value>(const char** src, DecimalV2Value* result) {
+ __int128 v = 0;
+ memcpy(&v, *src, DECIMAL_BYTE_SIZE);
+ *src += DECIMAL_BYTE_SIZE;
+ *result = DecimalV2Value(v);
+}
+
+template <>
+inline void Helper::read_from<StringValue>(const char** src, StringValue* result) {
+ int32_t length = *(int32_t*)(*src);
+ *src += 4;
+ *result = StringValue((char*)*src, length);
+ *src += length;
+}
+// read_from end
+
+} // namespace detail
+
+// Calculate the intersection of two or more bitmaps
+// Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...)
+// Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps
+// Todo(kks) Use Array type instead of variable arguments
+template <typename T>
+struct BitmapIntersect {
+public:
+ BitmapIntersect() = default;
+
+ explicit BitmapIntersect(const char* src) { deserialize(src); }
+
+ void add_key(const T key) {
+ BitmapValue empty_bitmap;
+ _bitmaps[key] = empty_bitmap;
+ }
+
+ void update(const T& key, const BitmapValue& bitmap) {
+ if (_bitmaps.find(key) != _bitmaps.end()) {
+ _bitmaps[key] |= bitmap;
+ }
+ }
+
+ void merge(const BitmapIntersect& other) {
+ for (auto& kv : other._bitmaps) {
+ if (_bitmaps.find(kv.first) != _bitmaps.end()) {
+ _bitmaps[kv.first] |= kv.second;
+ } else {
+ _bitmaps[kv.first] = kv.second;
+ }
+ }
+ }
+
+ // intersection
+ BitmapValue intersect() const {
+ BitmapValue result;
+ auto it = _bitmaps.begin();
+ result |= it->second;
+ it++;
+ for (; it != _bitmaps.end(); it++) {
+ result &= it->second;
+ }
+ return result;
+ }
+
+ // calculate the intersection for _bitmaps's bitmap values
+ int64_t intersect_count() const {
+ if (_bitmaps.empty()) {
+ return 0;
+ }
+ return intersect().cardinality();
+ }
+
+ // the serialize size
+ size_t size() {
+ size_t size = 4;
+ for (auto& kv : _bitmaps) {
+ size += detail::Helper::serialize_size(kv.first);
+ size += kv.second.getSizeInBytes();
+ }
+ return size;
+ }
+
+ //must call size() first
+ void serialize(char* dest) {
+ char* writer = dest;
+ *(int32_t*)writer = _bitmaps.size();
+ writer += 4;
+ for (auto& kv : _bitmaps) {
+ writer = detail::Helper::write_to(kv.first, writer);
+ kv.second.write(writer);
+ writer += kv.second.getSizeInBytes();
+ }
+ }
+
+ void deserialize(const char* src) {
+ const char* reader = src;
+ int32_t bitmaps_size = *(int32_t*)reader;
+ reader += 4;
+ for (int32_t i = 0; i < bitmaps_size; i++) {
+ T key;
+ detail::Helper::read_from(&reader, &key);
+ BitmapValue bitmap(reader);
+ reader += bitmap.getSizeInBytes();
+ _bitmaps[key] = bitmap;
+ }
+ }
+
+private:
+ std::map<T, BitmapValue> _bitmaps;
+};
+
+} // namespace doris
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 5c4d5c7b36..fc50adca0a 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -40,6 +40,7 @@ set(VEC_FILES
aggregate_functions/aggregate_function_percentile_approx.cpp
aggregate_functions/aggregate_function_simple_factory.cpp
aggregate_functions/aggregate_function_java_udaf.h
+ aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
columns/collator.cpp
columns/column.cpp
columns/column_array.cpp
diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
new file mode 100644
index 0000000000..470a6c8388
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h"
+
+#include <memory>
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+template <template <typename> class Impl>
+AggregateFunctionPtr create_aggregate_function_orthogonal(const std::string& name,
+ const DataTypes& argument_types,
+ const Array& params,
+ const bool result_is_nullable) {
+ if (argument_types.empty()) {
+ LOG(WARNING) << "Incorrect number of arguments for aggregate function " << name;
+ return nullptr;
+ } else if (argument_types.size() == 1) {
+ // only used at AGGREGATE (merge finalize) for variadic function
+ // and for orthogonal_bitmap_union_count function
+ return std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types);
+ } else {
+ const IDataType& argument_type = *argument_types[1];
+ AggregateFunctionPtr res(create_with_numeric_type<AggFunctionOrthBitmapFunc, Impl>(
+ argument_type, argument_types));
+
+ WhichDataType which(argument_type);
+
+ if (res) {
+ return res;
+ } else if (which.is_string_or_fixed_string()) {
+ return std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types);
+ }
+ LOG(WARNING) << "Incorrect Type " << argument_type.get_name()
+ << " of arguments for aggregate function " << name;
+ return nullptr;
+ }
+}
+
+AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_intersect(
+ const std::string& name, const DataTypes& argument_types, const Array& parameters,
+ bool result_is_nullable) {
+ return create_aggregate_function_orthogonal<AggOrthBitMapIntersect>(
+ name, argument_types, parameters, result_is_nullable);
+}
+
+AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_intersect_count(
+ const std::string& name, const DataTypes& argument_types, const Array& parameters,
+ bool result_is_nullable) {
+ return create_aggregate_function_orthogonal<AggOrthBitMapIntersectCount>(
+ name, argument_types, parameters, result_is_nullable);
+}
+
+AggregateFunctionPtr create_aggregate_function_intersect_count(const std::string& name,
+ const DataTypes& argument_types,
+ const Array& parameters,
+ bool result_is_nullable) {
+ return create_aggregate_function_orthogonal<AggIntersectCount>(name, argument_types, parameters,
+ result_is_nullable);
+}
+
+AggregateFunctionPtr create_aggregate_function_orthogonal_bitmap_union_count(
+ const std::string& name, const DataTypes& argument_types, const Array& parameters,
+ const bool result_is_nullable) {
+ return create_aggregate_function_orthogonal<OrthBitmapUnionCountData>(
+ name, argument_types, parameters, result_is_nullable);
+}
+
+void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory& factory) {
+ factory.register_function("orthogonal_bitmap_intersect",
+ create_aggregate_function_orthogonal_bitmap_intersect);
+
+ factory.register_function("orthogonal_bitmap_intersect_count",
+ create_aggregate_function_orthogonal_bitmap_intersect_count);
+
+ factory.register_function("orthogonal_bitmap_union_count",
+ create_aggregate_function_orthogonal_bitmap_union_count);
+
+ factory.register_function("intersect_count", create_aggregate_function_intersect_count);
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
new file mode 100644
index 0000000000..4f1fb69ec4
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exprs/bitmap_function.h"
+#include "util/bitmap_intersect.h"
+#include "util/bitmap_value.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_complex.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_bitmap.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct AggOrthBitmapBaseData {
+public:
+ using ColVecData = std::conditional_t<IsNumber<T>, ColumnVector<T>, ColumnString>;
+
+ void add(const IColumn** columns, size_t row_num) {
+ const auto& bitmap_col = static_cast<const ColumnBitmap&>(*columns[0]);
+ const auto& data_col = static_cast<const ColVecData&>(*columns[1]);
+ const auto& bitmap_value = bitmap_col.get_element(row_num);
+
+ if constexpr (IsNumber<T>) {
+ bitmap.update(data_col.get_element(row_num), bitmap_value);
+ } else {
+ bitmap.update(StringValue(data_col.get_data_at(row_num)), bitmap_value);
+ }
+ }
+
+ void init_add_key(const IColumn** columns, size_t row_num, int argument_size) {
+ if (first_init) {
+ DCHECK(argument_size > 1);
+ for (int idx = 2; idx < argument_size; ++idx) {
+ const auto& col = static_cast<const ColVecData&>(*columns[idx]);
+ if constexpr (IsNumber<T>) {
+ bitmap.add_key(col.get_element(row_num));
+ } else {
+ bitmap.add_key(StringValue(col.get_data_at(row_num)));
+ }
+ }
+ first_init = false;
+ }
+ }
+
+protected:
+ doris::BitmapIntersect<T> bitmap;
+ bool first_init = true;
+};
+
+template <typename T>
+struct AggOrthBitMapIntersect : public AggOrthBitmapBaseData<T> {
+public:
+ static constexpr auto name = "orthogonal_bitmap_intersect";
+
+ static DataTypePtr get_return_type() { return std::make_shared<DataTypeBitMap>(); }
+
+ void merge(const AggOrthBitMapIntersect& rhs) {
+ if (rhs.first_init) {
+ return;
+ }
+ result |= rhs.result;
+ }
+
+ void write(BufferWritable& buf) {
+ write_binary(AggOrthBitmapBaseData<T>::first_init, buf);
+ result = AggOrthBitmapBaseData<T>::bitmap.intersect();
+ DataTypeBitMap::serialize_as_stream(result, buf);
+ }
+
+ void read(BufferReadable& buf) {
+ read_binary(AggOrthBitmapBaseData<T>::first_init, buf);
+ DataTypeBitMap::deserialize_as_stream(result, buf);
+ }
+
+ void get(IColumn& to) const {
+ auto& column = static_cast<ColumnBitmap&>(to);
+ column.get_data().emplace_back(result);
+ }
+
+private:
+ BitmapValue result;
+};
+
+template <typename T>
+struct AggIntersectCount : public AggOrthBitmapBaseData<T> {
+public:
+ static constexpr auto name = "intersect_count";
+
+ static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); }
+
+ void merge(const AggIntersectCount& rhs) {
+ if (rhs.first_init) {
+ return;
+ }
+ AggOrthBitmapBaseData<T>::bitmap.merge(rhs.bitmap);
+ }
+
+ void write(BufferWritable& buf) {
+ write_binary(AggOrthBitmapBaseData<T>::first_init, buf);
+ std::string data;
+ data.resize(AggOrthBitmapBaseData<T>::bitmap.size());
+ AggOrthBitmapBaseData<T>::bitmap.serialize(data.data());
+ write_binary(data, buf);
+ }
+
+ void read(BufferReadable& buf) {
+ read_binary(AggOrthBitmapBaseData<T>::first_init, buf);
+ std::string data;
+ read_binary(data, buf);
+ AggOrthBitmapBaseData<T>::bitmap.deserialize(data.data());
+ }
+
+ void get(IColumn& to) const {
+ auto& column = static_cast<ColumnVector<Int64>&>(to);
+ column.get_data().emplace_back(AggOrthBitmapBaseData<T>::bitmap.intersect_count());
+ }
+};
+
+template <typename T>
+struct AggOrthBitMapIntersectCount : public AggOrthBitmapBaseData<T> {
+public:
+ static constexpr auto name = "orthogonal_bitmap_intersect_count";
+
+ static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); }
+
+ void merge(const AggOrthBitMapIntersectCount& rhs) {
+ if (rhs.first_init) {
+ return;
+ }
+ result += rhs.result;
+ }
+
+ void write(BufferWritable& buf) {
+ write_binary(AggOrthBitmapBaseData<T>::first_init, buf);
+ result = AggOrthBitmapBaseData<T>::bitmap.intersect_count();
+ write_binary(result, buf);
+ }
+
+ void read(BufferReadable& buf) {
+ read_binary(AggOrthBitmapBaseData<T>::first_init, buf);
+ read_binary(result, buf);
+ }
+
+ void get(IColumn& to) const {
+ auto& column = static_cast<ColumnVector<Int64>&>(to);
+ column.get_data().emplace_back(result);
+ }
+
+private:
+ Int64 result = 0;
+};
+
+template <typename T>
+struct OrthBitmapUnionCountData {
+ static constexpr auto name = "orthogonal_bitmap_union_count";
+
+ static DataTypePtr get_return_type() { return std::make_shared<DataTypeInt64>(); }
+ // Here no need doing anything, so only given an function declaration
+ void init_add_key(const IColumn** columns, size_t row_num, int argument_size) {}
+
+ void add(const IColumn** columns, size_t row_num) {
+ const auto& column = static_cast<const ColumnBitmap&>(*columns[0]);
+ value |= column.get_data()[row_num];
+ }
+ void merge(const OrthBitmapUnionCountData& rhs) { result += rhs.result; }
+
+ void write(BufferWritable& buf) {
+ result = value.cardinality();
+ write_binary(result, buf);
+ }
+
+ void read(BufferReadable& buf) { read_binary(result, buf); }
+
+ void get(IColumn& to) const {
+ auto& column = static_cast<ColumnVector<Int64>&>(to);
+ column.get_data().emplace_back(result ? result : value.cardinality());
+ }
+
+private:
+ BitmapValue value;
+ int64_t result = 0;
+};
+
+template <typename Impl>
+class AggFunctionOrthBitmapFunc final
+ : public IAggregateFunctionDataHelper<Impl, AggFunctionOrthBitmapFunc<Impl>> {
+public:
+ String get_name() const override { return Impl::name; }
+
+ AggFunctionOrthBitmapFunc(const DataTypes& argument_types_)
+ : IAggregateFunctionDataHelper<Impl, AggFunctionOrthBitmapFunc<Impl>>(argument_types_,
+ {}),
+ _argument_size(argument_types_.size()) {}
+
+ DataTypePtr get_return_type() const override { return Impl::get_return_type(); }
+
+ void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+ Arena*) const override {
+ this->data(place).init_add_key(columns, row_num, _argument_size);
+ this->data(place).add(columns, row_num);
+ }
+
+ void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+ Arena*) const override {
+ this->data(place).merge(this->data(rhs));
+ }
+
+ void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+ this->data(const_cast<AggregateDataPtr>(place)).write(buf);
+ }
+
+ void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+ Arena*) const override {
+ this->data(place).read(buf);
+ }
+
+ void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+ this->data(place).get(to);
+ }
+
+private:
+ int _argument_size;
+};
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
index 0afa9b97d3..badf756f8b 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -46,6 +46,7 @@ void register_aggregate_function_group_concat(AggregateFunctionSimpleFactory& fa
void register_aggregate_function_percentile(AggregateFunctionSimpleFactory& factory);
void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory);
void register_aggregate_function_percentile_approx(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory& factory);
AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() {
static std::once_flag oc;
static AggregateFunctionSimpleFactory instance;
@@ -68,6 +69,7 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() {
register_aggregate_function_percentile(instance);
register_aggregate_function_percentile_approx(instance);
register_aggregate_function_window_funnel(instance);
+ register_aggregate_function_orthogonal_bitmap(instance);
// if you only register function with no nullable, and wants to add nullable automatically, you should place function above this line
register_aggregate_function_combinator_null(instance);
diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp
index ddf8a52ea3..78116bcbed 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -27,6 +27,7 @@
#include "exprs/aggregate_functions.h"
#include "exprs/anyval_util.h"
#include "testutil/function_utils.h"
+#include "util/bitmap_intersect.h"
#include "util/bitmap_value.h"
#include "util/logging.h"
@@ -266,10 +267,10 @@ void test_bitmap_intersect(FunctionContext* ctx, ValType key1, ValType key2) {
BitmapIntersect<ValueType> intersect2;
for (size_t i = 2; i < const_vals.size(); i++) {
ValType* arg = reinterpret_cast<ValType*>(const_vals[i]);
- intersect2.add_key(detail::get_val<ValType, ValueType>(*arg));
+ intersect2.add_key(detail::Helper::get_val<ValType, ValueType>(*arg));
}
- intersect2.update(detail::get_val<ValType, ValueType>(key1), bitmap1);
- intersect2.update(detail::get_val<ValType, ValueType>(key2), bitmap2);
+ intersect2.update(detail::Helper::get_val<ValType, ValueType>(key1), bitmap1);
+ intersect2.update(detail::Helper::get_val<ValType, ValueType>(key2), bitmap2);
StringVal expected = convert_bitmap_intersect_to_string(ctx, intersect2);
EXPECT_EQ(expected, intersect1);
diff --git a/docs/.vuepress/sidebar/en/docs.js b/docs/.vuepress/sidebar/en/docs.js
index db754625b0..f7ad6c58df 100644
--- a/docs/.vuepress/sidebar/en/docs.js
+++ b/docs/.vuepress/sidebar/en/docs.js
@@ -442,6 +442,7 @@ module.exports = [
"bitmap_xor",
"to_bitmap",
"bitmap_max",
+ "intersect_count",
"orthogonal_bitmap_intersect",
"orthogonal_bitmap_intersect_count",
"orthogonal_bitmap_union_count",
diff --git a/docs/.vuepress/sidebar/zh-CN/docs.js b/docs/.vuepress/sidebar/zh-CN/docs.js
index 5d18239c19..6fe177719f 100644
--- a/docs/.vuepress/sidebar/zh-CN/docs.js
+++ b/docs/.vuepress/sidebar/zh-CN/docs.js
@@ -442,6 +442,7 @@ module.exports = [
"bitmap_xor",
"to_bitmap",
"bitmap_max",
+ "intersect_count",
"orthogonal_bitmap_intersect",
"orthogonal_bitmap_intersect_count",
"orthogonal_bitmap_union_count",
diff --git a/docs/en/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md
new file mode 100644
index 0000000000..938865d3ac
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md
@@ -0,0 +1,57 @@
+---
+{
+"title": "intersect_count",
+"language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## intersect_count
+### description
+#### Syntax
+
+`BITMAP INTERSECT_COUNT(bitmap_column, column_to_filter, filter_values)`
+Calculate the intersection of two or more bitmaps
+Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...)
+Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps
+
+### example
+
+```
+MySQL [test_query_qa]> select dt,bitmap_to_string(user_id) from pv_bitmap where dt in (3,4);
++------+-----------------------------+
+| dt | bitmap_to_string(`user_id`) |
++------+-----------------------------+
+| 4 | 1,2,3 |
+| 3 | 1,2,3,4,5 |
++------+-----------------------------+
+2 rows in set (0.012 sec)
+
+MySQL [test_query_qa]> select intersect_count(user_id,dt,3,4) from pv_bitmap;
++----------------------------------------+
+| intersect_count(`user_id`, `dt`, 3, 4) |
++----------------------------------------+
+| 3 |
++----------------------------------------+
+1 row in set (0.014 sec)
+```
+
+### keywords
+
+ INTERSECT_COUNT,BITMAP
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md
new file mode 100644
index 0000000000..41f58f6da0
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/intersect_count.md
@@ -0,0 +1,56 @@
+---
+{
+"title": "intersect_count",
+"language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## intersect_count
+### description
+#### Syntax
+
+`BITMAP INTERSECT_COUNT(bitmap_column, column_to_filter, filter_values)`
+聚合函数,求bitmap交集大小的函数, 不要求数据分布正交
+第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数是变长参数,含义是过滤维度列的不同取值
+
+### example
+
+```
+MySQL [test_query_qa]> select dt,bitmap_to_string(user_id) from pv_bitmap where dt in (3,4);
++------+-----------------------------+
+| dt | bitmap_to_string(`user_id`) |
++------+-----------------------------+
+| 4 | 1,2,3 |
+| 3 | 1,2,3,4,5 |
++------+-----------------------------+
+2 rows in set (0.012 sec)
+
+MySQL [test_query_qa]> select intersect_count(user_id,dt,3,4) from pv_bitmap;
++----------------------------------------+
+| intersect_count(`user_id`, `dt`, 3, 4) |
++----------------------------------------+
+| 3 |
++----------------------------------------+
+1 row in set (0.014 sec)
+```
+
+### keywords
+
+ INTERSECT_COUNT,BITMAP
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 594bd50c09..a9ca2504a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -559,20 +559,23 @@ public class FunctionCallExpr extends Expr {
throw new AnalysisException("BITMAP_UNION_INT params only support TINYINT or SMALLINT or INT");
}
- if (fnName.getFunction().equalsIgnoreCase(FunctionSet.INTERSECT_COUNT)) {
+ if (fnName.getFunction().equalsIgnoreCase(FunctionSet.INTERSECT_COUNT) || fnName.getFunction()
+ .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT) || fnName.getFunction()
+ .equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT)) {
if (children.size() <= 2) {
- throw new AnalysisException("intersect_count(bitmap_column, column_to_filter, filter_values) "
+ throw new AnalysisException(fnName + "(bitmap_column, column_to_filter, filter_values) "
+ "function requires at least three parameters");
}
Type inputType = getChild(0).getType();
if (!inputType.isBitmapType()) {
- throw new AnalysisException("intersect_count function first argument should be of BITMAP type, but was " + inputType);
+ throw new AnalysisException(
+ fnName + "function first argument should be of BITMAP type, but was " + inputType);
}
for (int i = 2; i < children.size(); i++) {
if (!getChild(i).isConstant()) {
- throw new AnalysisException("intersect_count function filter_values arg must be constant");
+ throw new AnalysisException(fnName + " function filter_values arg must be constant");
}
}
return;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
index 98d0488614..efa1c0c2f1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
@@ -46,8 +46,11 @@ public class AggregateFunction extends Function {
private static final Logger LOG = LogManager.getLogger(AggregateFunction.class);
- public static ImmutableSet<String> NOT_NULLABLE_AGGREGATE_FUNCTION_NAME_SET =
- ImmutableSet.of("row_number", "rank", "dense_rank", "multi_distinct_count", "multi_distinct_sum", "hll_union_agg", "hll_union", "bitmap_union", "bitmap_intersect", FunctionSet.COUNT, "approx_count_distinct", "ndv", FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", FunctionSet.WINDOW_FUNNEL);
+ public static ImmutableSet<String> NOT_NULLABLE_AGGREGATE_FUNCTION_NAME_SET = ImmutableSet.of("row_number", "rank",
+ "dense_rank", "multi_distinct_count", "multi_distinct_sum", "hll_union_agg", "hll_union", "bitmap_union",
+ "bitmap_intersect", "orthogonal_bitmap_intersect", "orthogonal_bitmap_intersect_count", "intersect_count",
+ "orthogonal_bitmap_union_count", FunctionSet.COUNT, "approx_count_distinct", "ndv",
+ FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize", FunctionSet.WINDOW_FUNNEL);
public static ImmutableSet<String> ALWAYS_NULLABLE_AGGREGATE_FUNCTION_NAME_SET =
ImmutableSet.of("stddev_samp", "variance_samp", "var_samp", "percentile_approx");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 88808abe89..5b263589c3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -1684,6 +1684,14 @@ public class FunctionSet<T> {
BITMAP_INTERSECT_FINALIZE_SYMBOL.get(t),
true, false, true));
+ // VEC_INTERSECT_COUNT
+ addBuiltin(
+ AggregateFunction.createBuiltin(INTERSECT_COUNT, Lists.newArrayList(Type.BITMAP, t, t), Type.BIGINT,
+ Type.VARCHAR, true, BITMAP_INTERSECT_INIT_SYMBOL.get(t),
+ BITMAP_INTERSECT_UPDATE_SYMBOL.get(t), BITMAP_INTERSECT_MERGE_SYMBOL.get(t),
+ BITMAP_INTERSECT_SERIALIZE_SYMBOL.get(t), null, null,
+ BITMAP_INTERSECT_FINALIZE_SYMBOL.get(t), true, false, true, true));
+
// HLL_UNION_AGG
addBuiltin(AggregateFunction.createBuiltin("hll_union_agg",
Lists.newArrayList(t), Type.BIGINT, Type.VARCHAR,
@@ -2042,6 +2050,15 @@ public class FunctionSet<T> {
"",
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
true, false, true));
+
+ //vec ORTHOGONAL_BITMAP_INTERSECT and ORTHOGONAL_BITMAP_INTERSECT_COUNT
+ addBuiltin(
+ AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_INTERSECT, Lists.newArrayList(Type.BITMAP, t, t),
+ Type.BITMAP, Type.BITMAP, true, "", "", "", "", "", "", "", true, false, true, true));
+
+ addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_INTERSECT_COUNT,
+ Lists.newArrayList(Type.BITMAP, t, t), Type.BIGINT, Type.BITMAP, true, "", "", "", "", "", "", "",
+ true, false, true, true));
}
// bitmap
addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION, Lists.newArrayList(Type.BITMAP),
@@ -2100,6 +2117,10 @@ public class FunctionSet<T> {
null,
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
true, true, true));
+ // ORTHOGONAL_BITMAP_UNION_COUNT vectorized
+ addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_UNION_COUNT, Lists.newArrayList(Type.BITMAP),
+ Type.BIGINT, Type.BITMAP, "", "", "", "", null, null, "", true, true, true, true));
+
// TODO(ml): supply function symbol
addBuiltin(AggregateFunction.createBuiltin(BITMAP_INTERSECT, Lists.newArrayList(Type.BITMAP),
Type.BITMAP, Type.VARCHAR,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org