You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2022/10/19 07:40:11 UTC
[doris] branch master updated: [Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)
This is an automated email from the ASF dual-hosted git repository.
yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0b368fbbfa [Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)
0b368fbbfa is described below
commit 0b368fbbfa2033fd117d69ce9fb2a5ab0df98ee8
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Wed Oct 19 15:40:04 2022 +0800
[Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)
* [Bugfix] add negtive value check when create mv using vec
---
be/src/olap/schema_change.cpp | 3 +-
.../vec/functions/function_always_not_nullable.h | 20 ++++++---
be/src/vec/functions/function_bitmap.cpp | 48 ++++++++++++++++++++++
.../doris/analysis/CreateMaterializedViewStmt.java | 44 ++++++++++++++------
.../apache/doris/analysis/FunctionCallExpr.java | 4 ++
.../doris/analysis/MVColumnBitmapUnionPattern.java | 3 +-
.../java/org/apache/doris/catalog/Function.java | 4 ++
.../java/org/apache/doris/catalog/FunctionSet.java | 1 +
.../rewrite/mvrewrite/FunctionCallEqualRule.java | 1 +
.../rewrite/mvrewrite/ToBitmapToSlotRefRule.java | 3 +-
gensrc/script/doris_builtins_functions.py | 6 +++
11 files changed, 117 insertions(+), 20 deletions(-)
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 49c42615b1..3295035f4b 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -619,7 +619,8 @@ Status RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t data
if (!_schema_mapping[i].materialized_function.empty()) {
bool (*_do_materialized_transform)(RowCursor*, RowCursor*, const TabletColumn&, int,
int, MemPool*) = nullptr;
- if (_schema_mapping[i].materialized_function == "to_bitmap") {
+ if (_schema_mapping[i].materialized_function == "to_bitmap" ||
+ _schema_mapping[i].materialized_function == "to_bitmap_with_check") {
_do_materialized_transform = to_bitmap;
} else if (_schema_mapping[i].materialized_function == "hll_hash") {
_do_materialized_transform = hll_hash;
diff --git a/be/src/vec/functions/function_always_not_nullable.h b/be/src/vec/functions/function_always_not_nullable.h
index e15e1a91e9..d7f437bcdd 100644
--- a/be/src/vec/functions/function_always_not_nullable.h
+++ b/be/src/vec/functions/function_always_not_nullable.h
@@ -23,7 +23,7 @@
namespace doris::vectorized {
-template <typename Function>
+template <typename Function, bool WithReturn = false>
class FunctionAlwaysNotNullable : public IFunction {
public:
static constexpr auto name = Function::name;
@@ -62,8 +62,14 @@ public:
col_nullable->get_null_map_column_ptr().get());
if (col != nullptr && col_nullmap != nullptr) {
- Function::vector_nullable(col->get_chars(), col->get_offsets(),
- col_nullmap->get_data(), column_result);
+ if constexpr (WithReturn) {
+ RETURN_IF_ERROR(Function::vector_nullable(col->get_chars(), col->get_offsets(),
+ col_nullmap->get_data(),
+ column_result));
+ } else {
+ Function::vector_nullable(col->get_chars(), col->get_offsets(),
+ col_nullmap->get_data(), column_result);
+ }
block.replace_by_position(result, std::move(column_result));
return Status::OK();
@@ -71,8 +77,12 @@ public:
return type_error();
}
} else if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
- Function::vector(col->get_chars(), col->get_offsets(), column_result);
-
+ if constexpr (WithReturn) {
+ RETURN_IF_ERROR(
+ Function::vector(col->get_chars(), col->get_offsets(), column_result));
+ } else {
+ Function::vector(col->get_chars(), col->get_offsets(), column_result);
+ }
block.replace_by_position(result, std::move(column_result));
return Status::OK();
} else {
diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp
index 397d70aed5..61e0f99f8c 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -75,6 +75,51 @@ struct ToBitmap {
}
};
+struct ToBitmapWithCheck {
+ static constexpr auto name = "to_bitmap_with_check";
+ using ReturnType = DataTypeBitMap;
+
+ static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+ MutableColumnPtr& col_res) {
+ return execute<false>(data, offsets, nullptr, col_res);
+ }
+
+ static Status vector_nullable(const ColumnString::Chars& data,
+ const ColumnString::Offsets& offsets, const NullMap& nullmap,
+ MutableColumnPtr& col_res) {
+ return execute<true>(data, offsets, &nullmap, col_res);
+ }
+ template <bool arg_is_nullable>
+ static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+ const NullMap* nullmap, MutableColumnPtr& col_res) {
+ auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
+ auto& res_data = res_column->get_data();
+ size_t size = offsets.size();
+
+ for (size_t i = 0; i < size; ++i) {
+ if (arg_is_nullable && ((*nullmap)[i])) {
+ continue;
+ } else {
+ const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+ size_t str_size = offsets[i] - offsets[i - 1];
+ StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+ uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
+ raw_str, str_size, &parse_result);
+ if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
+ res_data[i].add(int_value);
+ } else {
+ LOG(WARNING) << "The input: " << raw_str
+ << " is not valid, to_bitmap only support bigint value from 0 to "
+ "18446744073709551615 currently";
+ return Status::InternalError(
+ "bitmap value must be in [0, 18446744073709551615)");
+ }
+ }
+ }
+ return Status::OK();
+ }
+};
+
struct BitmapFromString {
static constexpr auto name = "bitmap_from_string";
@@ -536,6 +581,8 @@ public:
using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
+using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
+
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
@@ -564,6 +611,7 @@ using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>;
void register_function_bitmap(SimpleFunctionFactory& factory) {
factory.register_function<FunctionBitmapEmpty>();
factory.register_function<FunctionToBitmap>();
+ factory.register_function<FunctionToBitmapWithCheck>();
factory.register_function<FunctionBitmapFromString>();
factory.register_function<FunctionBitmapHash>();
factory.register_function<FunctionBitmapHash64>();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
index 28926ac73e..8651a2eee8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
@@ -64,11 +64,11 @@ public class CreateMaterializedViewStmt extends DdlStmt {
static {
FN_NAME_TO_PATTERN = Maps.newHashMap();
FN_NAME_TO_PATTERN.put(AggregateType.SUM.name().toLowerCase(),
- new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
+ new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
FN_NAME_TO_PATTERN.put(AggregateType.MIN.name().toLowerCase(),
- new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
+ new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
FN_NAME_TO_PATTERN.put(AggregateType.MAX.name().toLowerCase(),
- new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
+ new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
FN_NAME_TO_PATTERN.put(FunctionSet.COUNT, new MVColumnOneChildPattern(FunctionSet.COUNT));
FN_NAME_TO_PATTERN.put(FunctionSet.BITMAP_UNION, new MVColumnBitmapUnionPattern());
FN_NAME_TO_PATTERN.put(FunctionSet.HLL_UNION, new MVColumnHLLUnionPattern());
@@ -147,16 +147,16 @@ public class CreateMaterializedViewStmt extends DdlStmt {
analyzeFromClause();
if (selectStmt.getWhereClause() != null) {
throw new AnalysisException("The where clause is not supported in add materialized view clause, expr:"
- + selectStmt.getWhereClause().toSql());
+ + selectStmt.getWhereClause().toSql());
}
if (selectStmt.getHavingPred() != null) {
throw new AnalysisException("The having clause is not supported in add materialized view clause, expr:"
- + selectStmt.getHavingPred().toSql());
+ + selectStmt.getHavingPred().toSql());
}
analyzeOrderByClause();
if (selectStmt.getLimit() != -1) {
throw new AnalysisException("The limit clause is not supported in add materialized view clause, expr:"
- + " limit " + selectStmt.getLimit());
+ + " limit " + selectStmt.getLimit());
}
}
@@ -182,7 +182,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
Expr selectListItemExpr = selectListItem.getExpr();
if (!(selectListItemExpr instanceof SlotRef) && !(selectListItemExpr instanceof FunctionCallExpr)) {
throw new AnalysisException("The materialized view only support the single column or function expr. "
- + "Error column: " + selectListItemExpr.toSql());
+ + "Error column: " + selectListItemExpr.toSql());
}
if (selectListItemExpr instanceof SlotRef) {
if (meetAggregate) {
@@ -211,6 +211,25 @@ public class CreateMaterializedViewStmt extends DdlStmt {
throw new AnalysisException(
"The function " + functionName + " must match pattern:" + mvColumnPattern.toString());
}
+
+ // for bitmap_union(to_bitmap(column)) function, we should check value is not negative
+ // in vectorized schema_change mode, so we should rewrite the function to
+ // bitmap_union(to_bitmap_with_check(column))
+ if (functionName.equalsIgnoreCase("bitmap_union")) {
+ if (functionCallExpr.getChildren().size() == 1
+ && functionCallExpr.getChild(0) instanceof FunctionCallExpr) {
+ Expr child = functionCallExpr.getChild(0);
+ FunctionCallExpr childFunctionCallExpr = (FunctionCallExpr) child;
+ if (childFunctionCallExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
+ childFunctionCallExpr.setFnName(
+ new FunctionName(childFunctionCallExpr.getFnName().getDb(),
+ "to_bitmap_with_check"));
+ childFunctionCallExpr.getFn().setName(
+ new FunctionName(childFunctionCallExpr.getFn().getFunctionName().getDb(),
+ "to_bitmap_with_check"));
+ }
+ }
+ }
}
// check duplicate column
List<SlotRef> slots = new ArrayList<>();
@@ -256,7 +275,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
List<OrderByElement> orderByElements = selectStmt.getOrderByElements();
if (orderByElements.size() > mvColumnItemList.size()) {
throw new AnalysisException("The number of columns in order clause must be less than " + "the number of "
- + "columns in select clause");
+ + "columns in select clause");
}
if (beginIndexOfAggregation != -1 && (orderByElements.size() != (beginIndexOfAggregation))) {
throw new AnalysisException("The key of columns in mv must be all of group by columns");
@@ -265,13 +284,13 @@ public class CreateMaterializedViewStmt extends DdlStmt {
Expr orderByElement = orderByElements.get(i).getExpr();
if (!(orderByElement instanceof SlotRef)) {
throw new AnalysisException("The column in order clause must be original column without calculation. "
- + "Error column: " + orderByElement.toSql());
+ + "Error column: " + orderByElement.toSql());
}
MVColumnItem mvColumnItem = mvColumnItemList.get(i);
SlotRef slotRef = (SlotRef) orderByElement;
if (!mvColumnItem.getName().equalsIgnoreCase(slotRef.getColumnName())) {
throw new AnalysisException("The order of columns in order by clause must be same as "
- + "the order of columns in select list");
+ + "the order of columns in select list");
}
Preconditions.checkState(mvColumnItem.getAggregationType() == null);
mvColumnItem.setIsKey(true);
@@ -451,7 +470,8 @@ public class CreateMaterializedViewStmt extends DdlStmt {
CastExpr castExpr = new CastExpr(new TypeDef(Type.VARCHAR), baseSlotRef);
List<Expr> params = Lists.newArrayList();
params.add(castExpr);
- FunctionCallExpr defineExpr = new FunctionCallExpr(FunctionSet.TO_BITMAP, params);
+ FunctionCallExpr defineExpr =
+ new FunctionCallExpr(FunctionSet.TO_BITMAP_WITH_CHECK, params);
result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
} else {
result.put(baseColumnName, null);
@@ -471,7 +491,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
case FunctionSet.COUNT:
Expr defineExpr = new CaseExpr(null, Lists.newArrayList(
new CaseWhenClause(new IsNullPredicate(slots.get(0), false),
- new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
+ new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
break;
default:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index bef48ed4a3..6ff3404129 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -128,6 +128,10 @@ public class FunctionCallExpr extends Expr {
isTableFnCall = tableFnCall;
}
+ public void setFnName(FunctionName fnName) {
+ this.fnName = fnName;
+ }
+
public Function getFn() {
return fn;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java
index 8dabace3a1..25159fb252 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java
@@ -44,7 +44,8 @@ public class MVColumnBitmapUnionPattern implements MVColumnPattern {
}
} else if (fnExpr.getChild(0) instanceof FunctionCallExpr) {
FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
- if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
+ if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
+ && !child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP_WITH_CHECK)) {
return false;
}
SlotRef slotRef = child0FnExpr.getChild(0).unwrapSlotRef();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
index f1fd3c0f06..c99cc87c59 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
@@ -201,6 +201,10 @@ public class Function implements Writable {
location = loc;
}
+ public void setName(FunctionName name) {
+ this.name = name;
+ }
+
public TFunctionBinaryType getBinaryType() {
return binaryType;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index a2d5ba356a..eab49faa5e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -911,6 +911,7 @@ public class FunctionSet<T> {
.build();
public static final String TO_BITMAP = "to_bitmap";
+ public static final String TO_BITMAP_WITH_CHECK = "to_bitmap_with_check";
public static final String BITMAP_UNION = "bitmap_union";
public static final String BITMAP_UNION_COUNT = "bitmap_union_count";
public static final String BITMAP_UNION_INT = "bitmap_union_int";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java
index aee0871851..4e9005f339 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java
@@ -41,6 +41,7 @@ public class FunctionCallEqualRule implements MVExprEqualRule {
builder.put(FunctionSet.HLL_UNION, "hll_union");
builder.put(FunctionSet.HLL_UNION, "hll_raw_agg");
builder.put(FunctionSet.TO_BITMAP, FunctionSet.TO_BITMAP);
+ builder.put(FunctionSet.TO_BITMAP_WITH_CHECK, FunctionSet.TO_BITMAP_WITH_CHECK);
builder.put(FunctionSet.HLL_HASH, FunctionSet.HLL_HASH);
columnAggTypeMatchFnName = builder.build();
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java
index c97f069cf8..a486c6185f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java
@@ -65,7 +65,8 @@ public class ToBitmapToSlotRefRule implements ExprRewriteRule {
return expr;
}
FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
- if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
+ if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")
+ && !child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap_with_check")) {
return expr;
}
if (child0FnExpr.getChild(0) instanceof SlotRef) {
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 621bcb3764..ed71c714fc 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2421,6 +2421,9 @@ visible_functions = [
[['to_bitmap'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+ [['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'],
+ '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
+ '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
@@ -2430,6 +2433,9 @@ visible_functions = [
[['to_bitmap'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+ [['to_bitmap_with_check'], 'BITMAP', ['STRING'],
+ '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
+ '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org