You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/11/18 12:37:07 UTC

[incubator-doris] branch master updated: Support bitmap_empty function (#2227)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d8cfbbe  Support bitmap_empty function (#2227)
d8cfbbe is described below

commit d8cfbbedf740720d4755eb4489902cc240dcdd5f
Author: kangkaisen <ka...@apache.org>
AuthorDate: Mon Nov 18 20:37:00 2019 +0800

    Support bitmap_empty function (#2227)
---
 be/src/exprs/bitmap_function.cpp                   |  8 ++++++++
 be/src/exprs/bitmap_function.h                     |  1 +
 be/test/exprs/bitmap_function_test.cpp             |  9 ++++++++
 .../sql-functions/aggregate-functions/bitmap.md    |  4 +++-
 .../Data Manipulation/STREAM LOAD.md               |  8 ++++----
 .../sql-functions/aggregate-functions/bitmap_EN.md |  2 ++
 .../Data Manipulation/STREAM LOAD_EN.md            |  8 ++++----
 .../java/org/apache/doris/analysis/InsertStmt.java | 12 +++++++----
 .../java/org/apache/doris/catalog/FunctionSet.java |  2 ++
 .../org/apache/doris/planner/LoadScanNode.java     | 24 ++++++++++++++++++++++
 .../java/org/apache/doris/planner/ScanNode.java    | 22 --------------------
 gensrc/script/doris_builtins_functions.py          |  2 ++
 12 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 6a45fc5..107362a 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -30,6 +30,14 @@ void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) {
     dst->ptr = (uint8_t*)new RoaringBitmap();
 }
 
+StringVal BitmapFunctions::bitmap_empty(FunctionContext* ctx) {
+    RoaringBitmap bitmap;
+    std::string buf;
+    buf.resize(bitmap.size());
+    bitmap.serialize((char*)buf.c_str());
+    return AnyValUtil::from_string_temp(ctx, buf);
+}
+
 template <typename T>
 void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst) {
     if (src.is_null) {
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index 65b866d..a8fa923 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -26,6 +26,7 @@ class BitmapFunctions {
 public:
     static void init();
     static void bitmap_init(FunctionContext* ctx, StringVal* slot);
+    static StringVal bitmap_empty(FunctionContext* ctx);
     template <typename T>
     static void bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst);
     // the input src's ptr need to point a RoaringBitmap, this function will release the
diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp
index 0c0a45a..228a104 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -52,6 +52,15 @@ private:
     FunctionContext* ctx;
 };
 
+TEST_F(BitmapFunctionsTest, bitmap_empty) {
+    StringVal result = BitmapFunctions::bitmap_empty(ctx);
+
+    RoaringBitmap bitmap;
+    StringVal expected = convert_bitmap_to_string(ctx, bitmap);
+
+    ASSERT_EQ(expected, result);
+}
+
 TEST_F(BitmapFunctionsTest, to_bitmap) {
     StringVal input = AnyValUtil::from_string_temp(ctx, std::string("1024"));
     StringVal result = BitmapFunctions::to_bitmap(ctx, input);
diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md
index f5a916d..be6a3b0 100644
--- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md
+++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md
@@ -31,6 +31,8 @@ under the License.
 `BITMAP_UNION_INT(expr)` : 计算TINYINT,SMALLINT和INT类型的列中不同值的个数,返回值和
 COUNT(DISTINCT expr)相同
 
+`BITMAP_EMPTY()`: 生成空Bitmap列,用于insert或导入的时填充默认值
+
 
 注意:
 
@@ -85,4 +87,4 @@ mysql> select bitmap_count(bitmap_union(id2)) from bitmap_test;
 
 ## keyword
 
-BITMAP,BITMAP_COUNT,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
+BITMAP,BITMAP_COUNT,BITMAP_EMPTY,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md
index edb6428..cd39266 100644
--- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md	
+++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md	
@@ -111,14 +111,14 @@ under the License.
     6. 使用streaming方式导入(用户是defalut_cluster中的)
         seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load
 
-    7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用empty_hll补充数据中没有的列
-        curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load
+    7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用hll_empty补充数据中没有的列
+        curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
 
     8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan
         curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load
 
-    9. 导入含有聚合模型为BITMAP_UNION列的表,可以是表中的列或者数据中的列用于生成BITMAP_UNION列
-        curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load
+    9. 导入含有BITMAP列的表,可以是表中的列或者数据中的列用于生成BITMAP列,也可以使用bitmap_empty填充空的Bitmap
+        curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
 
  
 ## keyword
diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md
index f711f91..bdfc36a 100644
--- a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md
+++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md
@@ -31,6 +31,8 @@ under the License.
 
 `BITMAP_UNION_INT(expr)` : Calculate the distinct value number of TINYINT,SMALLINT and INT type column. Same as COUNT(DISTINCT expr)
 
+`BITMAP_EMPTY()`: Generate empty bitmap column for insert into or load data.
+
 Notice:
 
 	1. TO_BITMAP function only receives TINYINT,SMALLINT,INT.
diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md
index 907869c..3d04ca9 100644
--- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md	
+++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md	
@@ -168,17 +168,17 @@ Where url is the url given by ErrorURL.
 
     ```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load```
 
-7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use empty_hll to supplement columns that are not in the data
+7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data
 
-    ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
+    ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
 
 8. load data for strict mode filtering and set the time zone to Africa/Abidjan
 
     ```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load```
 
-9. load a table with an aggregate model of `BITMAP_UNION`, either a column in the table or a column in the data to generate a `BITMAP_UNION` column
+9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data
 
-    ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load```
+    ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
 
 
 ## keyword
diff --git a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java
index c823e1f..b456310 100644
--- a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java
+++ b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java
@@ -642,8 +642,9 @@ public class InsertStmt extends DdlStmt {
 
     private void checkBitmapCompatibility(Column col, Expr expr) throws AnalysisException {
         boolean isCompatible = false;
-        final String bitmapMismatchLog = "Column's agg type is bitmap_union,"
-                + " SelectList must contains bitmap_union column, to_bitmap or bitmap_union function's result, column=" + col.getName();
+        final String bitmapMismatchLog = "Column's type is BITMAP,"
+                + " SelectList must contains BITMAP column, to_bitmap or bitmap_union" +
+                " or bitmap_empty function's result, column=" + col.getName();
         if (expr instanceof SlotRef) {
             final SlotRef slot = (SlotRef) expr;
             Column column = slot.getDesc().getColumn();
@@ -660,8 +661,11 @@ public class InsertStmt extends DdlStmt {
             }
         } else if (expr instanceof FunctionCallExpr) {
             final FunctionCallExpr functionExpr = (FunctionCallExpr) expr;
-            if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
-                isCompatible = true; // select id, to_bitmap(id2) from table;
+            // select id, to_bitmap(id2) from table
+            // select id, bitmap_empty from table
+            if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
+            || functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
+                isCompatible = true;
             }
         }
 
diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 03ca91e..d9964ab 100644
--- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -513,8 +513,10 @@ public class FunctionSet {
     public static final String BITMAP_UNION = "bitmap_union";
     public static final String BITMAP_UNION_INT = "bitmap_union_int";
     public static final String BITMAP_COUNT = "bitmap_count";
+    public static final String BITMAP_EMPTY = "bitmap_empty";
     public static final String TO_BITMAP = "to_bitmap";
 
+
     private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
             ImmutableMap.<Type, String>builder()
                     .put(Type.TINYINT,
diff --git a/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java
index 29a6dc6..4caec0f 100644
--- a/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java
+++ b/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java
@@ -20,10 +20,14 @@ package org.apache.doris.planner;
 import org.apache.doris.analysis.Analyzer;
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.analysis.ExprSubstitutionMap;
+import org.apache.doris.analysis.FunctionCallExpr;
 import org.apache.doris.analysis.SlotDescriptor;
 import org.apache.doris.analysis.SlotRef;
 import org.apache.doris.analysis.TupleDescriptor;
+import org.apache.doris.catalog.AggregateType;
+import org.apache.doris.catalog.FunctionSet;
 import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.UserException;
 
 import com.google.common.collect.Lists;
@@ -72,4 +76,24 @@ public abstract class LoadScanNode extends ScanNode {
         addConjuncts(whereExpr.getConjuncts());
     }
 
+    protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
+        boolean isCompatible = true;
+        if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
+            if (!(expr instanceof FunctionCallExpr)) {
+                isCompatible = false;
+            } else {
+                FunctionCallExpr fn = (FunctionCallExpr) expr;
+                if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
+                        && !fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
+                    isCompatible = false;
+                }
+            }
+        }
+        if (!isCompatible) {
+            throw new AnalysisException("bitmap column must use to_bitmap or empty_bitmap function, like "
+                    + slotDesc.getColumn().getName() + "=to_bitmap(xxx)"
+                    + slotDesc.getColumn().getName() + "=bitmap_empty()");
+        }
+    }
+
 }
diff --git a/fe/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/src/main/java/org/apache/doris/planner/ScanNode.java
index 3e9bc86..75f6ab3 100644
--- a/fe/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/src/main/java/org/apache/doris/planner/ScanNode.java
@@ -18,13 +18,9 @@
 package org.apache.doris.planner;
 
 import org.apache.doris.analysis.Expr;
-import org.apache.doris.analysis.FunctionCallExpr;
 import org.apache.doris.analysis.SlotDescriptor;
 import org.apache.doris.analysis.TupleDescriptor;
-import org.apache.doris.catalog.AggregateType;
-import org.apache.doris.catalog.FunctionSet;
 import org.apache.doris.catalog.PrimitiveType;
-import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.thrift.TNetworkAddress;
 import org.apache.doris.thrift.TScanRangeLocations;
@@ -80,24 +76,6 @@ abstract public class ScanNode extends PlanNode {
         }
     }
 
-    protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
-        boolean isCompatible = true;
-        if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
-            if (!(expr instanceof FunctionCallExpr)) {
-                isCompatible = false;
-            } else {
-                FunctionCallExpr fn = (FunctionCallExpr) expr;
-                if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
-                    isCompatible = false;
-                }
-            }
-        }
-        if (!isCompatible) {
-            throw new AnalysisException("bitmap_union column must use to_bitmap function, like "
-                    + slotDesc.getColumn().getName() + "=to_bitmap(xxx)");
-        }
-    }
-
     /**
      * Returns all scan ranges plus their locations. Needs to be preceded by a call to
      * finalize().
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index b57add4..d5e1abc 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -601,6 +601,8 @@ visible_functions = [
         '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
     [['bitmap_count'], 'BIGINT', ['VARCHAR'],
         '_ZN5doris15BitmapFunctions12bitmap_countEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
+    [['bitmap_empty'], 'VARCHAR', [],
+        '_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE'],
 
 
     # aes and base64 function


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org