You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/11/04 04:14:57 UTC
[incubator-doris] branch master updated: [Function] Add bitmap
function bitmap_subset_limit (#6980)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 599ecb1 [Function] Add bitmap function bitmap_subset_limit (#6980)
599ecb1 is described below
commit 599ecb1f30d062199dfab270c116de0325df2e55
Author: pengxiangyu <di...@163.com>
AuthorDate: Thu Nov 4 12:14:47 2021 +0800
[Function] Add bitmap function bitmap_subset_limit (#6980)
Add bitmap function bitmap_subset_limit.
This function will return subset in specified index.
---
be/src/exprs/bitmap_function.cpp | 19 +++++++
be/src/exprs/bitmap_function.h | 2 +
be/src/util/bitmap_value.h | 21 ++++++++
be/test/exprs/bitmap_function_test.cpp | 55 ++++++++++++++++++++
be/test/util/bitmap_value_test.cpp | 19 +++++++
docs/.vuepress/sidebar/en.js | 1 +
docs/.vuepress/sidebar/zh-CN.js | 1 +
.../bitmap-functions/bitmap_subset_limit.md | 59 ++++++++++++++++++++++
.../bitmap-functions/bitmap_subset_limit.md | 59 ++++++++++++++++++++++
gensrc/script/doris_builtins_functions.py | 4 +-
10 files changed, 239 insertions(+), 1 deletion(-)
diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 6abbee4..ccab0c5 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -671,6 +671,25 @@ StringVal BitmapFunctions::bitmap_subset_in_range(FunctionContext* ctx, const St
return serialize(ctx, &ret_bitmap);
}
+StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const StringVal& src,
+ const BigIntVal& range_start, const BigIntVal& cardinality_limit) {
+ if (src.is_null || range_start.is_null || cardinality_limit.is_null) {
+ return StringVal::null();
+ }
+ if (range_start.val < 0 || cardinality_limit.val < 0) {
+ return StringVal::null();
+ }
+ BitmapValue ret_bitmap;
+ if (src.len == 0) {
+ ret_bitmap = *reinterpret_cast<BitmapValue*>(src.ptr);
+ } else {
+ BitmapValue bitmap = BitmapValue((char*)src.ptr);
+ bitmap.sub_limit(range_start.val, cardinality_limit.val, &ret_bitmap);
+ }
+
+ return serialize(ctx, &ret_bitmap);
+}
+
template void BitmapFunctions::bitmap_update_int<TinyIntVal>(FunctionContext* ctx,
const TinyIntVal& src, StringVal* dst);
template void BitmapFunctions::bitmap_update_int<SmallIntVal>(FunctionContext* ctx,
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index 89cf0d9..5b166a0 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -106,6 +106,8 @@ public:
static BigIntVal bitmap_max(FunctionContext* ctx, const StringVal& str);
static StringVal bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src,
const BigIntVal& range_start, const BigIntVal& range_end);
+ static StringVal bitmap_subset_limit(FunctionContext* ctx, const StringVal& src,
+ const BigIntVal& range_start, const BigIntVal& cardinality_limit);
};
} // namespace doris
#endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index af1670e..9bf6f65 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -1470,6 +1470,27 @@ public:
return count;
}
+ /**
+ * Return new set with specified start and limit
+ * @param range_start the start value for the range
+ * @param cardinality_limit the length of the subset
+ * @return the real count for subset, maybe less than cardinality_limit
+ */
+ int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit, BitmapValue* ret_bitmap) {
+ int64_t count = 0;
+ for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+ if (*it < range_start) {
+ continue;
+ }
+ if (count < cardinality_limit) {
+ ret_bitmap->add(*it);
+ ++count;
+ } else {
+ break;
+ }
+ }
+ return count;
+ }
private:
void _convert_to_smaller_type() {
diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp
index 4379858..c753383 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -754,6 +754,61 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
}
+TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
+ // null
+ StringVal res = BitmapFunctions::bitmap_subset_limit(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3));
+ ASSERT_TRUE(res.is_null);
+
+ // empty
+ BitmapValue bitmap0;
+ StringVal empty_str = convert_bitmap_to_string(ctx, bitmap0);
+ res = BitmapFunctions::bitmap_subset_limit(ctx, empty_str, BigIntVal(10), BigIntVal(20));
+ BigIntVal result = BitmapFunctions::bitmap_count(ctx, res);
+ ASSERT_EQ(BigIntVal(0), result);
+
+ // normal
+ BitmapValue bitmap1({0,1,2,3,4,5,6,7,45,47,49,43,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
+
+ StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(4), BigIntVal(10));
+ result = BitmapFunctions::bitmap_count(ctx, res);
+ ASSERT_EQ(BigIntVal(10), result);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(0), BigIntVal(1));
+ result = BitmapFunctions::bitmap_count(ctx, res);
+ ASSERT_EQ(BigIntVal(1), result);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(35), BigIntVal(10));
+ result = BitmapFunctions::bitmap_count(ctx, res);
+ ASSERT_EQ(BigIntVal(7), result);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(31), DecimalV2Value::MAX_INT64);
+ result = BitmapFunctions::bitmap_count(ctx, res);
+ ASSERT_EQ(BigIntVal(10), result);
+
+ // abnormal
+ // negative range_start and cardinality_limit
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20));
+ ASSERT_TRUE(res.is_null);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal(-20));
+ ASSERT_TRUE(res.is_null);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(-20));
+ ASSERT_TRUE(res.is_null);
+
+ // null range
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20));
+ ASSERT_TRUE(res.is_null);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal::null());
+ ASSERT_TRUE(res.is_null);
+
+ res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null());
+ ASSERT_TRUE(res.is_null);
+
+}
+
} // namespace doris
int main(int argc, char** argv) {
diff --git a/be/test/util/bitmap_value_test.cpp b/be/test/util/bitmap_value_test.cpp
index dc4a0f3..9973205 100644
--- a/be/test/util/bitmap_value_test.cpp
+++ b/be/test/util/bitmap_value_test.cpp
@@ -307,6 +307,25 @@ TEST(BitmapValueTest, bitmap_to_string) {
ASSERT_STREQ("1,2", empty.to_string().c_str());
}
+TEST(BitmapValueTest, sub_limit) {
+ BitmapValue bitmap({1,2,3,10,11,5,6,7,8,9});
+ BitmapValue ret_bitmap1;
+ ASSERT_EQ(5, bitmap.sub_limit(0, 5, &ret_bitmap1));
+ ASSERT_STREQ("1,2,3,5,6", ret_bitmap1.to_string().c_str());
+
+ BitmapValue ret_bitmap2;
+ ASSERT_EQ(6, bitmap.sub_limit(6, 10, &ret_bitmap2));
+ ASSERT_STREQ("6,7,8,9,10,11", ret_bitmap2.to_string().c_str());
+
+ BitmapValue ret_bitmap3;
+ ASSERT_EQ(3, bitmap.sub_limit(5, 3, &ret_bitmap3));
+ ASSERT_STREQ("5,6,7", ret_bitmap3.to_string().c_str());
+
+ BitmapValue ret_bitmap4;
+ ASSERT_EQ(5, bitmap.sub_limit(2, 5, &ret_bitmap4));
+ ASSERT_STREQ("2,3,5,6,7", ret_bitmap4.to_string().c_str());
+}
+
TEST(BitmapValueTest, bitmap_single_convert) {
BitmapValue bitmap;
ASSERT_STREQ("", bitmap.to_string().c_str());
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index e6c4e3b..39be10b 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -423,6 +423,7 @@ module.exports = [
"bitmap_and_not",
"bitmap_and_not_count",
"bitmap_subset_in_range",
+ "bitmap_subset_limit",
"bitmap_to_string",
"bitmap_union",
"bitmap_xor",
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index a21315d..837c04e 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -427,6 +427,7 @@ module.exports = [
"bitmap_and_not",
"bitmap_and_not_count",
"bitmap_subset_in_range",
+ "bitmap_subset_limit",
"bitmap_to_string",
"bitmap_union",
"bitmap_xor",
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
new file mode 100644
index 0000000..0e4948a
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
@@ -0,0 +1,59 @@
+---
+{
+ "title": "bitmap_subset_limit",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_subset_limit
+
+## Description
+
+### Syntax
+
+`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)`
+
+Create subset of the BITMAP, begin with range from range_start, limit by cardinality_limit
+range_start:start value for the range
+cardinality_limit:subset upper limit
+
+## example
+
+```
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value;
++-----------+
+| value |
++-----------+
+| 1,2,3 |
++-----------+
+
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value;
++-------+
+| value |
++-------+
+| 4,5 |
++-------+
+```
+
+## keyword
+
+ BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP
diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
new file mode 100644
index 0000000..be905b5
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
@@ -0,0 +1,59 @@
+---
+{
+ "title": "bitmap_subset_limit",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_subset_limit
+
+## Description
+
+### Syntax
+
+`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)`
+
+生成 src 的子 BITMAP, 从不小于 range_start 的位置开始,大小限制为 cardinality_limit 。
+range_start:范围起始点(含)
+cardinality_limit:子BIGMAP基数上限
+
+## example
+
+```
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value;
++-----------+
+| value |
++-----------+
+| 1,2,3 |
++-----------+
+
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value;
++-------+
+| value |
++-------+
+| 4,5 |
++-------+
+```
+
+## keyword
+
+ BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 87d0e0c..6c990b5 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1220,13 +1220,15 @@ visible_functions = [
[['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
'_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
'', '', 'vec', ''],
+ [['bitmap_subset_limit'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
+ '_ZN5doris15BitmapFunctions19bitmap_subset_limitEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
+ '', '', 'vec', ''],
[['bitmap_and_count'], 'BIGINT', ['BITMAP','BITMAP'],
'_ZN5doris15BitmapFunctions16bitmap_and_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
'', '', '', ''],
[['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP'],
'_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
'', '', '', ''],
-
# hash functions
[['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],
'_ZN5doris13HashFunctions15murmur_hash3_32EPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org