You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/11/18 12:37:07 UTC
[incubator-doris] branch master updated: Support bitmap_empty
function (#2227)
This is an automated email from the ASF dual-hosted git repository.
zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new d8cfbbe Support bitmap_empty function (#2227)
d8cfbbe is described below
commit d8cfbbedf740720d4755eb4489902cc240dcdd5f
Author: kangkaisen <ka...@apache.org>
AuthorDate: Mon Nov 18 20:37:00 2019 +0800
Support bitmap_empty function (#2227)
---
be/src/exprs/bitmap_function.cpp | 8 ++++++++
be/src/exprs/bitmap_function.h | 1 +
be/test/exprs/bitmap_function_test.cpp | 9 ++++++++
.../sql-functions/aggregate-functions/bitmap.md | 4 +++-
.../Data Manipulation/STREAM LOAD.md | 8 ++++----
.../sql-functions/aggregate-functions/bitmap_EN.md | 2 ++
.../Data Manipulation/STREAM LOAD_EN.md | 8 ++++----
.../java/org/apache/doris/analysis/InsertStmt.java | 12 +++++++----
.../java/org/apache/doris/catalog/FunctionSet.java | 2 ++
.../org/apache/doris/planner/LoadScanNode.java | 24 ++++++++++++++++++++++
.../java/org/apache/doris/planner/ScanNode.java | 22 --------------------
gensrc/script/doris_builtins_functions.py | 2 ++
12 files changed, 67 insertions(+), 35 deletions(-)
diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 6a45fc5..107362a 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -30,6 +30,14 @@ void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) {
dst->ptr = (uint8_t*)new RoaringBitmap();
}
+StringVal BitmapFunctions::bitmap_empty(FunctionContext* ctx) {
+ RoaringBitmap bitmap;
+ std::string buf;
+ buf.resize(bitmap.size());
+ bitmap.serialize((char*)buf.c_str());
+ return AnyValUtil::from_string_temp(ctx, buf);
+}
+
template <typename T>
void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst) {
if (src.is_null) {
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index 65b866d..a8fa923 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -26,6 +26,7 @@ class BitmapFunctions {
public:
static void init();
static void bitmap_init(FunctionContext* ctx, StringVal* slot);
+ static StringVal bitmap_empty(FunctionContext* ctx);
template <typename T>
static void bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst);
// the input src's ptr need to point a RoaringBitmap, this function will release the
diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp
index 0c0a45a..228a104 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -52,6 +52,15 @@ private:
FunctionContext* ctx;
};
+TEST_F(BitmapFunctionsTest, bitmap_empty) {
+ StringVal result = BitmapFunctions::bitmap_empty(ctx);
+
+ RoaringBitmap bitmap;
+ StringVal expected = convert_bitmap_to_string(ctx, bitmap);
+
+ ASSERT_EQ(expected, result);
+}
+
TEST_F(BitmapFunctionsTest, to_bitmap) {
StringVal input = AnyValUtil::from_string_temp(ctx, std::string("1024"));
StringVal result = BitmapFunctions::to_bitmap(ctx, input);
diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md
index f5a916d..be6a3b0 100644
--- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md
+++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md
@@ -31,6 +31,8 @@ under the License.
`BITMAP_UNION_INT(expr)` : 计算TINYINT,SMALLINT和INT类型的列中不同值的个数,返回值和
COUNT(DISTINCT expr)相同
+`BITMAP_EMPTY()`: 生成空Bitmap列,用于insert或导入的时填充默认值
+
注意:
@@ -85,4 +87,4 @@ mysql> select bitmap_count(bitmap_union(id2)) from bitmap_test;
## keyword
-BITMAP,BITMAP_COUNT,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
+BITMAP,BITMAP_COUNT,BITMAP_EMPTY,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md
index edb6428..cd39266 100644
--- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md
+++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md
@@ -111,14 +111,14 @@ under the License.
6. 使用streaming方式导入(用户是defalut_cluster中的)
seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load
- 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用empty_hll补充数据中没有的列
- curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load
+ 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用hll_empty补充数据中没有的列
+ curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan
curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load
- 9. 导入含有聚合模型为BITMAP_UNION列的表,可以是表中的列或者数据中的列用于生成BITMAP_UNION列
- curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load
+ 9. 导入含有BITMAP列的表,可以是表中的列或者数据中的列用于生成BITMAP列,也可以使用bitmap_empty填充空的Bitmap
+ curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
## keyword
diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md
index f711f91..bdfc36a 100644
--- a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md
+++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md
@@ -31,6 +31,8 @@ under the License.
`BITMAP_UNION_INT(expr)` : Calculate the distinct value number of TINYINT,SMALLINT and INT type column. Same as COUNT(DISTINCT expr)
+`BITMAP_EMPTY()`: Generate empty bitmap column for insert into or load data.
+
Notice:
1. TO_BITMAP function only receives TINYINT,SMALLINT,INT.
diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md
index 907869c..3d04ca9 100644
--- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md
+++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md
@@ -168,17 +168,17 @@ Where url is the url given by ErrorURL.
```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load```
-7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use empty_hll to supplement columns that are not in the data
+7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data
- ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
+ ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
8. load data for strict mode filtering and set the time zone to Africa/Abidjan
```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load```
-9. load a table with an aggregate model of `BITMAP_UNION`, either a column in the table or a column in the data to generate a `BITMAP_UNION` column
+9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data
- ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load```
+ ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
## keyword
diff --git a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java
index c823e1f..b456310 100644
--- a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java
+++ b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java
@@ -642,8 +642,9 @@ public class InsertStmt extends DdlStmt {
private void checkBitmapCompatibility(Column col, Expr expr) throws AnalysisException {
boolean isCompatible = false;
- final String bitmapMismatchLog = "Column's agg type is bitmap_union,"
- + " SelectList must contains bitmap_union column, to_bitmap or bitmap_union function's result, column=" + col.getName();
+ final String bitmapMismatchLog = "Column's type is BITMAP,"
+ + " SelectList must contains BITMAP column, to_bitmap or bitmap_union" +
+ " or bitmap_empty function's result, column=" + col.getName();
if (expr instanceof SlotRef) {
final SlotRef slot = (SlotRef) expr;
Column column = slot.getDesc().getColumn();
@@ -660,8 +661,11 @@ public class InsertStmt extends DdlStmt {
}
} else if (expr instanceof FunctionCallExpr) {
final FunctionCallExpr functionExpr = (FunctionCallExpr) expr;
- if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
- isCompatible = true; // select id, to_bitmap(id2) from table;
+ // select id, to_bitmap(id2) from table
+ // select id, bitmap_empty from table
+ if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
+ || functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
+ isCompatible = true;
}
}
diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 03ca91e..d9964ab 100644
--- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -513,8 +513,10 @@ public class FunctionSet {
public static final String BITMAP_UNION = "bitmap_union";
public static final String BITMAP_UNION_INT = "bitmap_union_int";
public static final String BITMAP_COUNT = "bitmap_count";
+ public static final String BITMAP_EMPTY = "bitmap_empty";
public static final String TO_BITMAP = "to_bitmap";
+
private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
ImmutableMap.<Type, String>builder()
.put(Type.TINYINT,
diff --git a/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java
index 29a6dc6..4caec0f 100644
--- a/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java
+++ b/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java
@@ -20,10 +20,14 @@ package org.apache.doris.planner;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.ExprSubstitutionMap;
+import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.SlotRef;
import org.apache.doris.analysis.TupleDescriptor;
+import org.apache.doris.catalog.AggregateType;
+import org.apache.doris.catalog.FunctionSet;
import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import com.google.common.collect.Lists;
@@ -72,4 +76,24 @@ public abstract class LoadScanNode extends ScanNode {
addConjuncts(whereExpr.getConjuncts());
}
+ protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
+ boolean isCompatible = true;
+ if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
+ if (!(expr instanceof FunctionCallExpr)) {
+ isCompatible = false;
+ } else {
+ FunctionCallExpr fn = (FunctionCallExpr) expr;
+ if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
+ && !fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
+ isCompatible = false;
+ }
+ }
+ }
+ if (!isCompatible) {
+ throw new AnalysisException("bitmap column must use to_bitmap or empty_bitmap function, like "
+ + slotDesc.getColumn().getName() + "=to_bitmap(xxx)"
+ + slotDesc.getColumn().getName() + "=bitmap_empty()");
+ }
+ }
+
}
diff --git a/fe/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/src/main/java/org/apache/doris/planner/ScanNode.java
index 3e9bc86..75f6ab3 100644
--- a/fe/src/main/java/org/apache/doris/planner/ScanNode.java
+++ b/fe/src/main/java/org/apache/doris/planner/ScanNode.java
@@ -18,13 +18,9 @@
package org.apache.doris.planner;
import org.apache.doris.analysis.Expr;
-import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.TupleDescriptor;
-import org.apache.doris.catalog.AggregateType;
-import org.apache.doris.catalog.FunctionSet;
import org.apache.doris.catalog.PrimitiveType;
-import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import org.apache.doris.thrift.TNetworkAddress;
import org.apache.doris.thrift.TScanRangeLocations;
@@ -80,24 +76,6 @@ abstract public class ScanNode extends PlanNode {
}
}
- protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
- boolean isCompatible = true;
- if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
- if (!(expr instanceof FunctionCallExpr)) {
- isCompatible = false;
- } else {
- FunctionCallExpr fn = (FunctionCallExpr) expr;
- if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
- isCompatible = false;
- }
- }
- }
- if (!isCompatible) {
- throw new AnalysisException("bitmap_union column must use to_bitmap function, like "
- + slotDesc.getColumn().getName() + "=to_bitmap(xxx)");
- }
- }
-
/**
* Returns all scan ranges plus their locations. Needs to be preceded by a call to
* finalize().
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index b57add4..d5e1abc 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -601,6 +601,8 @@ visible_functions = [
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
[['bitmap_count'], 'BIGINT', ['VARCHAR'],
'_ZN5doris15BitmapFunctions12bitmap_countEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
+ [['bitmap_empty'], 'VARCHAR', [],
+ '_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE'],
# aes and base64 function
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org