You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2021/11/04 04:14:57 UTC

[incubator-doris] branch master updated: [Function] Add bitmap function bitmap_subset_limit (#6980)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 599ecb1  [Function] Add bitmap function bitmap_subset_limit (#6980)
599ecb1 is described below

commit 599ecb1f30d062199dfab270c116de0325df2e55
Author: pengxiangyu <di...@163.com>
AuthorDate: Thu Nov 4 12:14:47 2021 +0800

    [Function] Add bitmap function bitmap_subset_limit (#6980)
    
    Add bitmap function bitmap_subset_limit.
    This function will return subset in specified index.
---
 be/src/exprs/bitmap_function.cpp                   | 19 +++++++
 be/src/exprs/bitmap_function.h                     |  2 +
 be/src/util/bitmap_value.h                         | 21 ++++++++
 be/test/exprs/bitmap_function_test.cpp             | 55 ++++++++++++++++++++
 be/test/util/bitmap_value_test.cpp                 | 19 +++++++
 docs/.vuepress/sidebar/en.js                       |  1 +
 docs/.vuepress/sidebar/zh-CN.js                    |  1 +
 .../bitmap-functions/bitmap_subset_limit.md        | 59 ++++++++++++++++++++++
 .../bitmap-functions/bitmap_subset_limit.md        | 59 ++++++++++++++++++++++
 gensrc/script/doris_builtins_functions.py          |  4 +-
 10 files changed, 239 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 6abbee4..ccab0c5 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -671,6 +671,25 @@ StringVal BitmapFunctions::bitmap_subset_in_range(FunctionContext* ctx, const St
     return serialize(ctx, &ret_bitmap);
 }
 
+StringVal BitmapFunctions::bitmap_subset_limit(FunctionContext* ctx, const StringVal& src,
+        const BigIntVal& range_start, const BigIntVal& cardinality_limit) {
+    if (src.is_null || range_start.is_null || cardinality_limit.is_null) {
+        return StringVal::null();
+    }
+    if (range_start.val < 0 || cardinality_limit.val < 0) {
+        return StringVal::null();
+    }
+    BitmapValue ret_bitmap;
+    if (src.len == 0) {
+        ret_bitmap = *reinterpret_cast<BitmapValue*>(src.ptr);
+    } else {
+        BitmapValue bitmap = BitmapValue((char*)src.ptr);
+        bitmap.sub_limit(range_start.val, cardinality_limit.val, &ret_bitmap);
+    }
+
+    return serialize(ctx, &ret_bitmap);
+}
+
 template void BitmapFunctions::bitmap_update_int<TinyIntVal>(FunctionContext* ctx,
                                                              const TinyIntVal& src, StringVal* dst);
 template void BitmapFunctions::bitmap_update_int<SmallIntVal>(FunctionContext* ctx,
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index 89cf0d9..5b166a0 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -106,6 +106,8 @@ public:
     static BigIntVal bitmap_max(FunctionContext* ctx, const StringVal& str);
     static StringVal bitmap_subset_in_range(FunctionContext* ctx, const StringVal& src, 
                                             const BigIntVal& range_start, const BigIntVal& range_end);
+    static StringVal bitmap_subset_limit(FunctionContext* ctx, const StringVal& src,
+                                         const BigIntVal& range_start, const BigIntVal& cardinality_limit);
 };
 } // namespace doris
 #endif //DORIS_BE_SRC_QUERY_EXPRS_BITMAP_FUNCTION_H
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index af1670e..9bf6f65 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -1470,6 +1470,27 @@ public:
         return count;
     }
 
+    /**
+     * Return new set with specified start and limit
+     * @param range_start the start value for the range
+     * @param cardinality_limit the length of the subset
+     * @return the real count for subset, maybe less than cardinality_limit
+     */
+    int64_t sub_limit(const int64_t& range_start, const int64_t& cardinality_limit, BitmapValue* ret_bitmap) {
+        int64_t count = 0;
+        for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+            if (*it < range_start) {
+                continue;
+            }
+            if (count < cardinality_limit) {
+                ret_bitmap->add(*it);
+                ++count;
+            } else {
+                break;
+            }
+        }
+        return count;
+    }
 
 private:
     void _convert_to_smaller_type() {
diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp
index 4379858..c753383 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -754,6 +754,61 @@ TEST_F(BitmapFunctionsTest, bitmap_subset_in_range) {
 
 }
 
+TEST_F(BitmapFunctionsTest, bitmap_subset_limit) {
+    // null
+    StringVal res = BitmapFunctions::bitmap_subset_limit(ctx, StringVal::null(), BigIntVal(1), BigIntVal(3));
+    ASSERT_TRUE(res.is_null);
+
+    // empty
+    BitmapValue bitmap0;
+    StringVal empty_str = convert_bitmap_to_string(ctx, bitmap0);
+    res = BitmapFunctions::bitmap_subset_limit(ctx, empty_str, BigIntVal(10), BigIntVal(20));
+    BigIntVal result = BitmapFunctions::bitmap_count(ctx, res);
+    ASSERT_EQ(BigIntVal(0), result);
+
+    // normal
+    BitmapValue bitmap1({0,1,2,3,4,5,6,7,45,47,49,43,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500});
+
+    StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(4), BigIntVal(10));
+    result = BitmapFunctions::bitmap_count(ctx, res);
+    ASSERT_EQ(BigIntVal(10), result);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(0), BigIntVal(1));
+    result = BitmapFunctions::bitmap_count(ctx, res);
+    ASSERT_EQ(BigIntVal(1), result);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(35), BigIntVal(10));
+    result = BitmapFunctions::bitmap_count(ctx, res);
+    ASSERT_EQ(BigIntVal(7), result);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(31), DecimalV2Value::MAX_INT64);
+    result = BitmapFunctions::bitmap_count(ctx, res);
+    ASSERT_EQ(BigIntVal(10), result);
+
+    // abnormal
+    // negative range_start and cardinality_limit
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(20));
+    ASSERT_TRUE(res.is_null);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal(-20));
+    ASSERT_TRUE(res.is_null);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(-10), BigIntVal(-20));
+    ASSERT_TRUE(res.is_null);
+
+    // null range
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal(20));
+    ASSERT_TRUE(res.is_null);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal(10), BigIntVal::null());
+    ASSERT_TRUE(res.is_null);
+
+    res = BitmapFunctions::bitmap_subset_limit(ctx, bitmap_src, BigIntVal::null(), BigIntVal::null());
+    ASSERT_TRUE(res.is_null);
+
+}
+
 } // namespace doris
 
 int main(int argc, char** argv) {
diff --git a/be/test/util/bitmap_value_test.cpp b/be/test/util/bitmap_value_test.cpp
index dc4a0f3..9973205 100644
--- a/be/test/util/bitmap_value_test.cpp
+++ b/be/test/util/bitmap_value_test.cpp
@@ -307,6 +307,25 @@ TEST(BitmapValueTest, bitmap_to_string) {
     ASSERT_STREQ("1,2", empty.to_string().c_str());
 }
 
+TEST(BitmapValueTest, sub_limit) {
+    BitmapValue bitmap({1,2,3,10,11,5,6,7,8,9});
+    BitmapValue ret_bitmap1;
+    ASSERT_EQ(5, bitmap.sub_limit(0, 5, &ret_bitmap1));
+    ASSERT_STREQ("1,2,3,5,6", ret_bitmap1.to_string().c_str());
+
+    BitmapValue ret_bitmap2;
+    ASSERT_EQ(6, bitmap.sub_limit(6, 10, &ret_bitmap2));
+    ASSERT_STREQ("6,7,8,9,10,11", ret_bitmap2.to_string().c_str());
+
+    BitmapValue ret_bitmap3;
+    ASSERT_EQ(3, bitmap.sub_limit(5, 3, &ret_bitmap3));
+    ASSERT_STREQ("5,6,7", ret_bitmap3.to_string().c_str());
+
+    BitmapValue ret_bitmap4;
+    ASSERT_EQ(5, bitmap.sub_limit(2, 5, &ret_bitmap4));
+    ASSERT_STREQ("2,3,5,6,7", ret_bitmap4.to_string().c_str());
+}
+
 TEST(BitmapValueTest, bitmap_single_convert) {
     BitmapValue bitmap;
     ASSERT_STREQ("", bitmap.to_string().c_str());
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index e6c4e3b..39be10b 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -423,6 +423,7 @@ module.exports = [
               "bitmap_and_not",
               "bitmap_and_not_count",
               "bitmap_subset_in_range",
+              "bitmap_subset_limit",
               "bitmap_to_string",
               "bitmap_union",
               "bitmap_xor",
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index a21315d..837c04e 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -427,6 +427,7 @@ module.exports = [
               "bitmap_and_not",
               "bitmap_and_not_count",
               "bitmap_subset_in_range",
+              "bitmap_subset_limit",
               "bitmap_to_string",
               "bitmap_union",
               "bitmap_xor",
diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
new file mode 100644
index 0000000..0e4948a
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
@@ -0,0 +1,59 @@
+---
+{
+    "title": "bitmap_subset_limit",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_subset_limit
+
+## Description
+
+### Syntax
+
+`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)`
+
+Create subset of the BITMAP, begin with range from range_start, limit by cardinality_limit
+range_start:start value for the range
+cardinality_limit:subset upper limit
+
+## example
+
+```
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value;
++-----------+
+| value     |
++-----------+
+| 1,2,3 |
++-----------+
+
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value;
++-------+
+| value |
++-------+
+| 4,5     |
++-------+
+```
+
+## keyword
+
+    BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP
diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
new file mode 100644
index 0000000..be905b5
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md
@@ -0,0 +1,59 @@
+---
+{
+    "title": "bitmap_subset_limit",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_subset_limit
+
+## Description
+
+### Syntax
+
+`BITMAP BITMAP_SUBSET_LIMIT(BITMAP src, BIGINT range_start, BIGINT cardinality_limit)`
+
+生成 src 的子 BITMAP, 从不小于 range_start 的位置开始,大小限制为 cardinality_limit 。
+range_start:范围起始点(含)
+cardinality_limit:子BIGMAP基数上限
+
+## example
+
+```
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3)) value;
++-----------+
+| value     |
++-----------+
+| 1,2,3 |
++-----------+
+
+mysql> select bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3)) value;
++-------+
+| value |
++-------+
+| 4,5     |
++-------+
+```
+
+## keyword
+
+    BITMAP_SUBSET_LIMIT,BITMAP_SUBSET,BITMAP
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 87d0e0c..6c990b5 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1220,13 +1220,15 @@ visible_functions = [
     [['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
         '_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
         '', '', 'vec', ''],
+    [['bitmap_subset_limit'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
+        '_ZN5doris15BitmapFunctions19bitmap_subset_limitEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
+        '', '', 'vec', ''],
     [['bitmap_and_count'], 'BIGINT', ['BITMAP','BITMAP'],
         '_ZN5doris15BitmapFunctions16bitmap_and_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '', '', '', ''],
     [['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP'],
         '_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '', '', '', ''],
-        
     # hash functions
     [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],
         '_ZN5doris13HashFunctions15murmur_hash3_32EPN9doris_udf15FunctionContextEiPKNS1_9StringValE',

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org