You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2022/10/19 07:40:11 UTC

[doris] branch master updated: [Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0b368fbbfa [Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)
0b368fbbfa is described below

commit 0b368fbbfa2033fd117d69ce9fb2a5ab0df98ee8
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Wed Oct 19 15:40:04 2022 +0800

    [Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)
    
    * [Bugfix] add negtive value check when create mv using vec
---
 be/src/olap/schema_change.cpp                      |  3 +-
 .../vec/functions/function_always_not_nullable.h   | 20 ++++++---
 be/src/vec/functions/function_bitmap.cpp           | 48 ++++++++++++++++++++++
 .../doris/analysis/CreateMaterializedViewStmt.java | 44 ++++++++++++++------
 .../apache/doris/analysis/FunctionCallExpr.java    |  4 ++
 .../doris/analysis/MVColumnBitmapUnionPattern.java |  3 +-
 .../java/org/apache/doris/catalog/Function.java    |  4 ++
 .../java/org/apache/doris/catalog/FunctionSet.java |  1 +
 .../rewrite/mvrewrite/FunctionCallEqualRule.java   |  1 +
 .../rewrite/mvrewrite/ToBitmapToSlotRefRule.java   |  3 +-
 gensrc/script/doris_builtins_functions.py          |  6 +++
 11 files changed, 117 insertions(+), 20 deletions(-)

diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 49c42615b1..3295035f4b 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -619,7 +619,8 @@ Status RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t data
             if (!_schema_mapping[i].materialized_function.empty()) {
                 bool (*_do_materialized_transform)(RowCursor*, RowCursor*, const TabletColumn&, int,
                                                    int, MemPool*) = nullptr;
-                if (_schema_mapping[i].materialized_function == "to_bitmap") {
+                if (_schema_mapping[i].materialized_function == "to_bitmap" ||
+                    _schema_mapping[i].materialized_function == "to_bitmap_with_check") {
                     _do_materialized_transform = to_bitmap;
                 } else if (_schema_mapping[i].materialized_function == "hll_hash") {
                     _do_materialized_transform = hll_hash;
diff --git a/be/src/vec/functions/function_always_not_nullable.h b/be/src/vec/functions/function_always_not_nullable.h
index e15e1a91e9..d7f437bcdd 100644
--- a/be/src/vec/functions/function_always_not_nullable.h
+++ b/be/src/vec/functions/function_always_not_nullable.h
@@ -23,7 +23,7 @@
 
 namespace doris::vectorized {
 
-template <typename Function>
+template <typename Function, bool WithReturn = false>
 class FunctionAlwaysNotNullable : public IFunction {
 public:
     static constexpr auto name = Function::name;
@@ -62,8 +62,14 @@ public:
                     col_nullable->get_null_map_column_ptr().get());
 
             if (col != nullptr && col_nullmap != nullptr) {
-                Function::vector_nullable(col->get_chars(), col->get_offsets(),
-                                          col_nullmap->get_data(), column_result);
+                if constexpr (WithReturn) {
+                    RETURN_IF_ERROR(Function::vector_nullable(col->get_chars(), col->get_offsets(),
+                                                              col_nullmap->get_data(),
+                                                              column_result));
+                } else {
+                    Function::vector_nullable(col->get_chars(), col->get_offsets(),
+                                              col_nullmap->get_data(), column_result);
+                }
 
                 block.replace_by_position(result, std::move(column_result));
                 return Status::OK();
@@ -71,8 +77,12 @@ public:
                 return type_error();
             }
         } else if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
-            Function::vector(col->get_chars(), col->get_offsets(), column_result);
-
+            if constexpr (WithReturn) {
+                RETURN_IF_ERROR(
+                        Function::vector(col->get_chars(), col->get_offsets(), column_result));
+            } else {
+                Function::vector(col->get_chars(), col->get_offsets(), column_result);
+            }
             block.replace_by_position(result, std::move(column_result));
             return Status::OK();
         } else {
diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp
index 397d70aed5..61e0f99f8c 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -75,6 +75,51 @@ struct ToBitmap {
     }
 };
 
+struct ToBitmapWithCheck {
+    static constexpr auto name = "to_bitmap_with_check";
+    using ReturnType = DataTypeBitMap;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         MutableColumnPtr& col_res) {
+        return execute<false>(data, offsets, nullptr, col_res);
+    }
+
+    static Status vector_nullable(const ColumnString::Chars& data,
+                                  const ColumnString::Offsets& offsets, const NullMap& nullmap,
+                                  MutableColumnPtr& col_res) {
+        return execute<true>(data, offsets, &nullmap, col_res);
+    }
+    template <bool arg_is_nullable>
+    static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                          const NullMap* nullmap, MutableColumnPtr& col_res) {
+        auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
+        auto& res_data = res_column->get_data();
+        size_t size = offsets.size();
+
+        for (size_t i = 0; i < size; ++i) {
+            if (arg_is_nullable && ((*nullmap)[i])) {
+                continue;
+            } else {
+                const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+                size_t str_size = offsets[i] - offsets[i - 1];
+                StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+                uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
+                        raw_str, str_size, &parse_result);
+                if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
+                    res_data[i].add(int_value);
+                } else {
+                    LOG(WARNING) << "The input: " << raw_str
+                                 << " is not valid, to_bitmap only support bigint value from 0 to "
+                                    "18446744073709551615 currently";
+                    return Status::InternalError(
+                            "bitmap value must be in [0, 18446744073709551615)");
+                }
+            }
+        }
+        return Status::OK();
+    }
+};
+
 struct BitmapFromString {
     static constexpr auto name = "bitmap_from_string";
 
@@ -536,6 +581,8 @@ public:
 
 using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
 using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
+using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
+
 using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
 using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
 using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
@@ -564,6 +611,7 @@ using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>;
 void register_function_bitmap(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionBitmapEmpty>();
     factory.register_function<FunctionToBitmap>();
+    factory.register_function<FunctionToBitmapWithCheck>();
     factory.register_function<FunctionBitmapFromString>();
     factory.register_function<FunctionBitmapHash>();
     factory.register_function<FunctionBitmapHash64>();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
index 28926ac73e..8651a2eee8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java
@@ -64,11 +64,11 @@ public class CreateMaterializedViewStmt extends DdlStmt {
     static {
         FN_NAME_TO_PATTERN = Maps.newHashMap();
         FN_NAME_TO_PATTERN.put(AggregateType.SUM.name().toLowerCase(),
-                               new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
+                new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
         FN_NAME_TO_PATTERN.put(AggregateType.MIN.name().toLowerCase(),
-                               new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
+                new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
         FN_NAME_TO_PATTERN.put(AggregateType.MAX.name().toLowerCase(),
-                               new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
+                new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
         FN_NAME_TO_PATTERN.put(FunctionSet.COUNT, new MVColumnOneChildPattern(FunctionSet.COUNT));
         FN_NAME_TO_PATTERN.put(FunctionSet.BITMAP_UNION, new MVColumnBitmapUnionPattern());
         FN_NAME_TO_PATTERN.put(FunctionSet.HLL_UNION, new MVColumnHLLUnionPattern());
@@ -147,16 +147,16 @@ public class CreateMaterializedViewStmt extends DdlStmt {
         analyzeFromClause();
         if (selectStmt.getWhereClause() != null) {
             throw new AnalysisException("The where clause is not supported in add materialized view clause, expr:"
-                                                + selectStmt.getWhereClause().toSql());
+                    + selectStmt.getWhereClause().toSql());
         }
         if (selectStmt.getHavingPred() != null) {
             throw new AnalysisException("The having clause is not supported in add materialized view clause, expr:"
-                                                + selectStmt.getHavingPred().toSql());
+                    + selectStmt.getHavingPred().toSql());
         }
         analyzeOrderByClause();
         if (selectStmt.getLimit() != -1) {
             throw new AnalysisException("The limit clause is not supported in add materialized view clause, expr:"
-                                                + " limit " + selectStmt.getLimit());
+                    + " limit " + selectStmt.getLimit());
         }
     }
 
@@ -182,7 +182,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
             Expr selectListItemExpr = selectListItem.getExpr();
             if (!(selectListItemExpr instanceof SlotRef) && !(selectListItemExpr instanceof FunctionCallExpr)) {
                 throw new AnalysisException("The materialized view only support the single column or function expr. "
-                                                    + "Error column: " + selectListItemExpr.toSql());
+                        + "Error column: " + selectListItemExpr.toSql());
             }
             if (selectListItemExpr instanceof SlotRef) {
                 if (meetAggregate) {
@@ -211,6 +211,25 @@ public class CreateMaterializedViewStmt extends DdlStmt {
                         throw new AnalysisException(
                                 "The function " + functionName + " must match pattern:" + mvColumnPattern.toString());
                     }
+
+                    // for bitmap_union(to_bitmap(column)) function, we should check value is not negative
+                    // in vectorized schema_change mode, so we should rewrite the function to
+                    // bitmap_union(to_bitmap_with_check(column))
+                    if (functionName.equalsIgnoreCase("bitmap_union")) {
+                        if (functionCallExpr.getChildren().size() == 1
+                                && functionCallExpr.getChild(0) instanceof FunctionCallExpr) {
+                            Expr child = functionCallExpr.getChild(0);
+                            FunctionCallExpr childFunctionCallExpr = (FunctionCallExpr) child;
+                            if (childFunctionCallExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
+                                childFunctionCallExpr.setFnName(
+                                        new FunctionName(childFunctionCallExpr.getFnName().getDb(),
+                                                "to_bitmap_with_check"));
+                                childFunctionCallExpr.getFn().setName(
+                                        new FunctionName(childFunctionCallExpr.getFn().getFunctionName().getDb(),
+                                                "to_bitmap_with_check"));
+                            }
+                        }
+                    }
                 }
                 // check duplicate column
                 List<SlotRef> slots = new ArrayList<>();
@@ -256,7 +275,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
         List<OrderByElement> orderByElements = selectStmt.getOrderByElements();
         if (orderByElements.size() > mvColumnItemList.size()) {
             throw new AnalysisException("The number of columns in order clause must be less than " + "the number of "
-                                                + "columns in select clause");
+                    + "columns in select clause");
         }
         if (beginIndexOfAggregation != -1 && (orderByElements.size() != (beginIndexOfAggregation))) {
             throw new AnalysisException("The key of columns in mv must be all of group by columns");
@@ -265,13 +284,13 @@ public class CreateMaterializedViewStmt extends DdlStmt {
             Expr orderByElement = orderByElements.get(i).getExpr();
             if (!(orderByElement instanceof SlotRef)) {
                 throw new AnalysisException("The column in order clause must be original column without calculation. "
-                                                    + "Error column: " + orderByElement.toSql());
+                        + "Error column: " + orderByElement.toSql());
             }
             MVColumnItem mvColumnItem = mvColumnItemList.get(i);
             SlotRef slotRef = (SlotRef) orderByElement;
             if (!mvColumnItem.getName().equalsIgnoreCase(slotRef.getColumnName())) {
                 throw new AnalysisException("The order of columns in order by clause must be same as "
-                                                    + "the order of columns in select list");
+                        + "the order of columns in select list");
             }
             Preconditions.checkState(mvColumnItem.getAggregationType() == null);
             mvColumnItem.setIsKey(true);
@@ -451,7 +470,8 @@ public class CreateMaterializedViewStmt extends DdlStmt {
                             CastExpr castExpr = new CastExpr(new TypeDef(Type.VARCHAR), baseSlotRef);
                             List<Expr> params = Lists.newArrayList();
                             params.add(castExpr);
-                            FunctionCallExpr defineExpr = new FunctionCallExpr(FunctionSet.TO_BITMAP, params);
+                            FunctionCallExpr defineExpr =
+                                    new FunctionCallExpr(FunctionSet.TO_BITMAP_WITH_CHECK, params);
                             result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
                         } else {
                             result.put(baseColumnName, null);
@@ -471,7 +491,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
                     case FunctionSet.COUNT:
                         Expr defineExpr = new CaseExpr(null, Lists.newArrayList(
                                 new CaseWhenClause(new IsNullPredicate(slots.get(0), false),
-                                                   new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
+                                        new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
                         result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
                         break;
                     default:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index bef48ed4a3..6ff3404129 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -128,6 +128,10 @@ public class FunctionCallExpr extends Expr {
         isTableFnCall = tableFnCall;
     }
 
+    public void setFnName(FunctionName fnName) {
+        this.fnName = fnName;
+    }
+
     public Function getFn() {
         return fn;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java
index 8dabace3a1..25159fb252 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java
@@ -44,7 +44,8 @@ public class MVColumnBitmapUnionPattern implements MVColumnPattern {
             }
         } else if (fnExpr.getChild(0) instanceof FunctionCallExpr) {
             FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
-            if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
+            if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
+                    && !child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP_WITH_CHECK)) {
                 return false;
             }
             SlotRef slotRef = child0FnExpr.getChild(0).unwrapSlotRef();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
index f1fd3c0f06..c99cc87c59 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
@@ -201,6 +201,10 @@ public class Function implements Writable {
         location = loc;
     }
 
+    public void setName(FunctionName name) {
+        this.name = name;
+    }
+
     public TFunctionBinaryType getBinaryType() {
         return binaryType;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index a2d5ba356a..eab49faa5e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -911,6 +911,7 @@ public class FunctionSet<T> {
                     .build();
 
     public static final String TO_BITMAP = "to_bitmap";
+    public static final String TO_BITMAP_WITH_CHECK = "to_bitmap_with_check";
     public static final String BITMAP_UNION = "bitmap_union";
     public static final String BITMAP_UNION_COUNT = "bitmap_union_count";
     public static final String BITMAP_UNION_INT = "bitmap_union_int";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java
index aee0871851..4e9005f339 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java
@@ -41,6 +41,7 @@ public class FunctionCallEqualRule implements MVExprEqualRule {
         builder.put(FunctionSet.HLL_UNION, "hll_union");
         builder.put(FunctionSet.HLL_UNION, "hll_raw_agg");
         builder.put(FunctionSet.TO_BITMAP, FunctionSet.TO_BITMAP);
+        builder.put(FunctionSet.TO_BITMAP_WITH_CHECK, FunctionSet.TO_BITMAP_WITH_CHECK);
         builder.put(FunctionSet.HLL_HASH, FunctionSet.HLL_HASH);
         columnAggTypeMatchFnName = builder.build();
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java
index c97f069cf8..a486c6185f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java
@@ -65,7 +65,8 @@ public class ToBitmapToSlotRefRule implements ExprRewriteRule {
             return expr;
         }
         FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
-        if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
+        if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")
+                && !child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap_with_check")) {
             return expr;
         }
         if (child0FnExpr.getChild(0) instanceof SlotRef) {
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 621bcb3764..ed71c714fc 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2421,6 +2421,9 @@ visible_functions = [
     [['to_bitmap'], 'BITMAP', ['VARCHAR'],
         '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
         '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+    [['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'],
+        '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['bitmap_hash'], 'BITMAP', ['VARCHAR'],
         '_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
         '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
@@ -2430,6 +2433,9 @@ visible_functions = [
     [['to_bitmap'], 'BITMAP', ['STRING'],
         '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
         '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+    [['to_bitmap_with_check'], 'BITMAP', ['STRING'],
+        '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['bitmap_hash'], 'BITMAP', ['STRING'],
         '_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE',
         '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org