You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/21 02:15:27 UTC

[doris] branch branch-1.1-lts updated: [fix](cherry-pick)pick some code from master to fix bugs (#13517)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
     new 401fe8bc9a [fix](cherry-pick)pick some code from master to fix bugs (#13517)
401fe8bc9a is described below

commit 401fe8bc9aa0a25360ba34fd7d7d86f32a98de8c
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Oct 21 10:15:21 2022 +0800

    [fix](cherry-pick)pick some code from master to fix bugs (#13517)
---
 be/src/vec/exec/vsort_node.cpp                     | 21 ++++-
 be/src/vec/exec/vsort_node.h                       |  4 +
 be/src/vec/functions/in.cpp                        | 97 ++++++++++++++++++----
 .../java/org/apache/doris/analysis/SelectStmt.java | 12 +++
 .../java/org/apache/doris/planner/SortNode.java    | 11 +++
 gensrc/thrift/PlanNodes.thrift                     |  2 +
 6 files changed, 130 insertions(+), 17 deletions(-)

diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp
index 99c8090fde..c494d8231d 100644
--- a/be/src/vec/exec/vsort_node.cpp
+++ b/be/src/vec/exec/vsort_node.cpp
@@ -36,6 +36,16 @@ Status VSortNode::init(const TPlanNode& tnode, RuntimeState* state) {
     RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.sort_node.sort_info, _pool));
     _is_asc_order = tnode.sort_node.sort_info.is_asc_order;
     _nulls_first = tnode.sort_node.sort_info.nulls_first;
+    if (tnode.sort_node.sort_info.__isset.slot_exprs_nullability_changed_flags) {
+        _need_convert_to_nullable_flags =
+                tnode.sort_node.sort_info.slot_exprs_nullability_changed_flags;
+    } else {
+        _need_convert_to_nullable_flags.resize(
+                tnode.sort_node.sort_info.__isset.sort_tuple_slot_exprs
+                        ? tnode.sort_node.sort_info.sort_tuple_slot_exprs.size()
+                        : 0,
+                false);
+    }
     return Status::OK();
 }
 
@@ -177,8 +187,17 @@ Status VSortNode::pretreat_block(doris::vectorized::Block& block) {
         }
 
         Block new_block;
+        int i = 0;
+
         for (auto column_id : valid_column_ids) {
-            new_block.insert(block.get_by_position(column_id));
+            if (_need_convert_to_nullable_flags[i]) {
+                auto column_ptr = make_nullable(block.get_by_position(column_id).column);
+                new_block.insert(
+                        {column_ptr, make_nullable(block.get_by_position(column_id).type), ""});
+            } else {
+                new_block.insert(block.get_by_position(column_id));
+            }
+            i++;
         }
         block.swap(new_block);
     }
diff --git a/be/src/vec/exec/vsort_node.h b/be/src/vec/exec/vsort_node.h
index f67326afa6..ba63839dd8 100644
--- a/be/src/vec/exec/vsort_node.h
+++ b/be/src/vec/exec/vsort_node.h
@@ -76,6 +76,10 @@ private:
     std::vector<Block> _sorted_blocks;
     std::priority_queue<SortCursor> _priority_queue;
 
+    // for some reason, _sort_tuple_slot_expr_ctxs is not-null but _lhs_ordering_expr_ctxs is nullable
+    // this flag list would be used to convert column to nullable.
+    std::vector<bool> _need_convert_to_nullable_flags;
+
     // TODO: Not using now, maybe should be delete
     // Keeps track of the number of rows skipped for handling _offset.
     int64_t _num_rows_skipped;
diff --git a/be/src/vec/functions/in.cpp b/be/src/vec/functions/in.cpp
index 216165b826..18ffe6e7b8 100644
--- a/be/src/vec/functions/in.cpp
+++ b/be/src/vec/functions/in.cpp
@@ -67,10 +67,17 @@ public:
         }
         auto* state = new InState();
         context->set_function_state(scope, state);
-        state->hybrid_set.reset(
-                vec_create_set(convert_type_to_primitive(context->get_arg_type(0)->type)));
+        if (context->get_arg_type(0)->type == FunctionContext::Type::TYPE_CHAR ||
+            context->get_arg_type(0)->type == FunctionContext::Type::TYPE_VARCHAR ||
+            context->get_arg_type(0)->type == FunctionContext::Type::TYPE_STRING) {
+            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
+            state->hybrid_set.reset(new StringValueSet());
+        } else {
+            state->hybrid_set.reset(
+                    vec_create_set(convert_type_to_primitive(context->get_arg_type(0)->type)));
+        }
 
-        DCHECK(context->get_num_args() > 1);
+        DCHECK(context->get_num_args() >= 1);
         for (int i = 1; i < context->get_num_args(); ++i) {
             const auto& const_column_ptr = context->get_constant_col(i);
             if (const_column_ptr != nullptr) {
@@ -78,7 +85,7 @@ public:
                 if (const_data.data == nullptr) {
                     state->null_in_set = true;
                 } else {
-                    state->hybrid_set->insert((void *) const_data.data, const_data.size);
+                    state->hybrid_set->insert((void*)const_data.data, const_data.size);
                 }
             } else {
                 state->use_set = false;
@@ -91,7 +98,7 @@ public:
     Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
         auto in_state = reinterpret_cast<InState*>(
-                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
         if (!in_state) {
             return Status::RuntimeError(fmt::format("funciton context for function '{}' must have Set;",
                                 get_name()));
@@ -109,17 +116,74 @@ public:
         auto materialized_column = left_arg.column->convert_to_full_column_if_const();
 
         if (in_state->use_set) {
-            for (size_t i = 0; i < input_rows_count; ++i) {
-                const auto& ref_data = materialized_column->get_data_at(i);
-                if (ref_data.data) {
-                    vec_res[i] = negative ^ in_state->hybrid_set->find((void *) ref_data.data, ref_data.size);
-                    if (in_state->null_in_set) {
+            if (materialized_column->is_nullable()) {
+                auto* null_col_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(
+                        materialized_column);
+                auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>(
+                                            null_col_ptr->get_null_map_column())
+                                            .get_data();
+                auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
+                auto search_hash_set = [&](auto* col_ptr) {
+                    for (size_t i = 0; i < input_rows_count; ++i) {
+                        const auto& ref_data = col_ptr->get_data_at(i);
+                        vec_res[i] =
+                                !null_bitmap[i] &&
+                                in_state->hybrid_set->find((void*)ref_data.data, ref_data.size);
+                        if constexpr (negative) {
+                            vec_res[i] = !vec_res[i];
+                        }
+                    }
+                };
+
+                if (nested_col_ptr->is_column_string()) {
+                    const auto* column_string_ptr =
+                            reinterpret_cast<const vectorized::ColumnString*>(nested_col_ptr);
+                    search_hash_set(column_string_ptr);
+                } else {
+                    // todo support other column type
+                    search_hash_set(nested_col_ptr);
+                }
+
+                if (!in_state->null_in_set) {
+                    for (size_t i = 0; i < input_rows_count; ++i) {
+                        vec_null_map_to[i] = null_bitmap[i];
+                    }
+                } else {
+                    for (size_t i = 0; i < input_rows_count; ++i) {
+                        vec_null_map_to[i] = null_bitmap[i] || (negative == vec_res[i]);
+                    }
+                }
+
+            } else { // non-nullable
+
+                auto search_hash_set = [&](auto* col_ptr) {
+                    for (size_t i = 0; i < input_rows_count; ++i) {
+                        const auto& ref_data = col_ptr->get_data_at(i);
+                        vec_res[i] =
+                                in_state->hybrid_set->find((void*)ref_data.data, ref_data.size);
+                        if constexpr (negative) {
+                            vec_res[i] = !vec_res[i];
+                        }
+                    }
+                };
+
+                if (materialized_column->is_column_string()) {
+                    const auto* column_string_ptr =
+                            reinterpret_cast<const vectorized::ColumnString*>(
+                                    materialized_column.get());
+                    search_hash_set(column_string_ptr);
+                } else {
+                    search_hash_set(materialized_column.get());
+                }
+
+                if (in_state->null_in_set) {
+                    for (size_t i = 0; i < input_rows_count; ++i) {
                         vec_null_map_to[i] = negative == vec_res[i];
-                    } else {
-                        vec_null_map_to[i] = false;
                     }
                 } else {
-                    vec_null_map_to[i] = true;
+                    for (size_t i = 0; i < input_rows_count; ++i) {
+                        vec_null_map_to[i] = false;
+                    }
                 }
             }
         } else {
@@ -144,9 +208,9 @@ public:
                     if (set_data.data == nullptr)
                         null_in_set = true;
                     else
-                        hybrid_set->insert((void *)(set_data.data), set_data.size);
+                        hybrid_set->insert((void*)(set_data.data), set_data.size);
                 }
-                vec_res[i] = negative ^ hybrid_set->find((void *) ref_data.data, ref_data.size);
+                vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
                 if (null_in_set) {
                     vec_null_map_to[i] = negative == vec_res[i];
                 } else {
@@ -156,7 +220,8 @@ public:
         }
 
         if (block.get_by_position(result).type->is_nullable()) {
-            block.replace_by_position(result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
+            block.replace_by_position(
+                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
         } else {
             block.replace_by_position(result, std::move(res));
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index b7ef73a2d6..7de001a808 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -1395,7 +1395,19 @@ public class SelectStmt extends QueryStmt {
             }
             List<Expr> oriGroupingExprs = groupByClause.getOriGroupingExprs();
             if (oriGroupingExprs != null) {
+                // we must make sure the expr is analyzed before rewrite
+                try {
+                    for (Expr expr : oriGroupingExprs) {
+                        expr.analyze(analyzer);
+                    }
+                } catch (AnalysisException ex) {
+                    //ignore any exception
+                }
                 rewriter.rewriteList(oriGroupingExprs, analyzer);
+                // after rewrite, need reset the analyze status for later re-analyze
+                for (Expr expr : oriGroupingExprs) {
+                    expr.reset();
+                }
             }
         }
         if (orderByElements != null) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index c6d5bf2332..60be79de29 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -66,6 +66,8 @@ public class SortNode extends PlanNode {
 
     private boolean isUnusedExprRemoved = false;
 
+    private ArrayList<Boolean> nullabilityChangedFlags = Lists.newArrayList();
+
     public void setIsAnalyticSort(boolean v) {
         isAnalyticSort = v;
     }
@@ -131,6 +133,7 @@ public class SortNode extends PlanNode {
                 for (int i = slotDescriptorList.size() - 1; i >= 0; i--) {
                     if (!slotDescriptorList.get(i).isMaterialized()) {
                         resolvedTupleExprs.remove(i);
+                        nullabilityChangedFlags.remove(i);
                     }
                 }
             }
@@ -201,6 +204,9 @@ public class SortNode extends PlanNode {
         removeUnusedExprs();
         if (resolvedTupleExprs != null) {
             sortInfo.setSortTupleSlotExprs(Expr.treesToThrift(resolvedTupleExprs));
+            // FIXME this is a bottom line solution for wrong nullability of resolvedTupleExprs
+            // remove the following line after nereids online
+            sortInfo.setSlotExprsNullabilityChangedFlags(nullabilityChangedFlags);
         }
         
         TSortNode sortNode = new TSortNode(sortInfo, useTopN);
@@ -267,11 +273,16 @@ public class SortNode extends PlanNode {
         for (int i = 0; i < slotExprs.size(); ++i) {
             resolvedTupleExprs.add(slotExprs.get(i));
             outputSmap.put(slotExprs.get(i), new SlotRef(sortTupleSlots.get(i)));
+            nullabilityChangedFlags.add(slotExprs.get(i).isNullable());
         }
 
         ExprSubstitutionMap childSmap = getCombinedChildSmap();
         resolvedTupleExprs = Expr.substituteList(resolvedTupleExprs, childSmap, analyzer, false);
 
+        for (int i = 0; i < resolvedTupleExprs.size(); ++i) {
+            nullabilityChangedFlags.set(i, nullabilityChangedFlags.get(i) ^ resolvedTupleExprs.get(i).isNullable());
+        }
+
         // Remap the ordering exprs to the tuple materialized by this sort node. The mapping
         // is a composition of the childSmap and the outputSmap_ because the child node may
         // have also remapped its input (e.g., as in a a series of (sort->analytic)* nodes).
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 9f23f96cef..4458416ee0 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -499,6 +499,8 @@ struct TSortInfo {
   // Expressions evaluated over the input row that materialize the tuple to be sorted.
   // Contains one expr per slot in the materialized tuple.
   4: optional list<Exprs.TExpr> sort_tuple_slot_exprs
+  // Indicates the nullable info of sort_tuple_slot_exprs is changed after substitute by child's smap
+  5: optional list<bool> slot_exprs_nullability_changed_flags
 }
 
 struct TSortNode {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org