You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/10/21 02:15:27 UTC
[doris] branch branch-1.1-lts updated: [fix](cherry-pick)pick some code from master to fix bugs (#13517)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new 401fe8bc9a [fix](cherry-pick)pick some code from master to fix bugs (#13517)
401fe8bc9a is described below
commit 401fe8bc9aa0a25360ba34fd7d7d86f32a98de8c
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Oct 21 10:15:21 2022 +0800
[fix](cherry-pick)pick some code from master to fix bugs (#13517)
---
be/src/vec/exec/vsort_node.cpp | 21 ++++-
be/src/vec/exec/vsort_node.h | 4 +
be/src/vec/functions/in.cpp | 97 ++++++++++++++++++----
.../java/org/apache/doris/analysis/SelectStmt.java | 12 +++
.../java/org/apache/doris/planner/SortNode.java | 11 +++
gensrc/thrift/PlanNodes.thrift | 2 +
6 files changed, 130 insertions(+), 17 deletions(-)
diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp
index 99c8090fde..c494d8231d 100644
--- a/be/src/vec/exec/vsort_node.cpp
+++ b/be/src/vec/exec/vsort_node.cpp
@@ -36,6 +36,16 @@ Status VSortNode::init(const TPlanNode& tnode, RuntimeState* state) {
RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.sort_node.sort_info, _pool));
_is_asc_order = tnode.sort_node.sort_info.is_asc_order;
_nulls_first = tnode.sort_node.sort_info.nulls_first;
+ if (tnode.sort_node.sort_info.__isset.slot_exprs_nullability_changed_flags) {
+ _need_convert_to_nullable_flags =
+ tnode.sort_node.sort_info.slot_exprs_nullability_changed_flags;
+ } else {
+ _need_convert_to_nullable_flags.resize(
+ tnode.sort_node.sort_info.__isset.sort_tuple_slot_exprs
+ ? tnode.sort_node.sort_info.sort_tuple_slot_exprs.size()
+ : 0,
+ false);
+ }
return Status::OK();
}
@@ -177,8 +187,17 @@ Status VSortNode::pretreat_block(doris::vectorized::Block& block) {
}
Block new_block;
+ int i = 0;
+
for (auto column_id : valid_column_ids) {
- new_block.insert(block.get_by_position(column_id));
+ if (_need_convert_to_nullable_flags[i]) {
+ auto column_ptr = make_nullable(block.get_by_position(column_id).column);
+ new_block.insert(
+ {column_ptr, make_nullable(block.get_by_position(column_id).type), ""});
+ } else {
+ new_block.insert(block.get_by_position(column_id));
+ }
+ i++;
}
block.swap(new_block);
}
diff --git a/be/src/vec/exec/vsort_node.h b/be/src/vec/exec/vsort_node.h
index f67326afa6..ba63839dd8 100644
--- a/be/src/vec/exec/vsort_node.h
+++ b/be/src/vec/exec/vsort_node.h
@@ -76,6 +76,10 @@ private:
std::vector<Block> _sorted_blocks;
std::priority_queue<SortCursor> _priority_queue;
+ // for some reason, _sort_tuple_slot_expr_ctxs is not-null but _lhs_ordering_expr_ctxs is nullable
+ // this flag list would be used to convert column to nullable.
+ std::vector<bool> _need_convert_to_nullable_flags;
+
// TODO: Not using now, maybe should be delete
// Keeps track of the number of rows skipped for handling _offset.
int64_t _num_rows_skipped;
diff --git a/be/src/vec/functions/in.cpp b/be/src/vec/functions/in.cpp
index 216165b826..18ffe6e7b8 100644
--- a/be/src/vec/functions/in.cpp
+++ b/be/src/vec/functions/in.cpp
@@ -67,10 +67,17 @@ public:
}
auto* state = new InState();
context->set_function_state(scope, state);
- state->hybrid_set.reset(
- vec_create_set(convert_type_to_primitive(context->get_arg_type(0)->type)));
+ if (context->get_arg_type(0)->type == FunctionContext::Type::TYPE_CHAR ||
+ context->get_arg_type(0)->type == FunctionContext::Type::TYPE_VARCHAR ||
+ context->get_arg_type(0)->type == FunctionContext::Type::TYPE_STRING) {
+ // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
+ state->hybrid_set.reset(new StringValueSet());
+ } else {
+ state->hybrid_set.reset(
+ vec_create_set(convert_type_to_primitive(context->get_arg_type(0)->type)));
+ }
- DCHECK(context->get_num_args() > 1);
+ DCHECK(context->get_num_args() >= 1);
for (int i = 1; i < context->get_num_args(); ++i) {
const auto& const_column_ptr = context->get_constant_col(i);
if (const_column_ptr != nullptr) {
@@ -78,7 +85,7 @@ public:
if (const_data.data == nullptr) {
state->null_in_set = true;
} else {
- state->hybrid_set->insert((void *) const_data.data, const_data.size);
+ state->hybrid_set->insert((void*)const_data.data, const_data.size);
}
} else {
state->use_set = false;
@@ -91,7 +98,7 @@ public:
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
auto in_state = reinterpret_cast<InState*>(
- context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+ context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
if (!in_state) {
return Status::RuntimeError(fmt::format("funciton context for function '{}' must have Set;",
get_name()));
@@ -109,17 +116,74 @@ public:
auto materialized_column = left_arg.column->convert_to_full_column_if_const();
if (in_state->use_set) {
- for (size_t i = 0; i < input_rows_count; ++i) {
- const auto& ref_data = materialized_column->get_data_at(i);
- if (ref_data.data) {
- vec_res[i] = negative ^ in_state->hybrid_set->find((void *) ref_data.data, ref_data.size);
- if (in_state->null_in_set) {
+ if (materialized_column->is_nullable()) {
+ auto* null_col_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(
+ materialized_column);
+ auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>(
+ null_col_ptr->get_null_map_column())
+ .get_data();
+ auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
+ auto search_hash_set = [&](auto* col_ptr) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ const auto& ref_data = col_ptr->get_data_at(i);
+ vec_res[i] =
+ !null_bitmap[i] &&
+ in_state->hybrid_set->find((void*)ref_data.data, ref_data.size);
+ if constexpr (negative) {
+ vec_res[i] = !vec_res[i];
+ }
+ }
+ };
+
+ if (nested_col_ptr->is_column_string()) {
+ const auto* column_string_ptr =
+ reinterpret_cast<const vectorized::ColumnString*>(nested_col_ptr);
+ search_hash_set(column_string_ptr);
+ } else {
+ // todo support other column type
+ search_hash_set(nested_col_ptr);
+ }
+
+ if (!in_state->null_in_set) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ vec_null_map_to[i] = null_bitmap[i];
+ }
+ } else {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ vec_null_map_to[i] = null_bitmap[i] || (negative == vec_res[i]);
+ }
+ }
+
+ } else { // non-nullable
+
+ auto search_hash_set = [&](auto* col_ptr) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ const auto& ref_data = col_ptr->get_data_at(i);
+ vec_res[i] =
+ in_state->hybrid_set->find((void*)ref_data.data, ref_data.size);
+ if constexpr (negative) {
+ vec_res[i] = !vec_res[i];
+ }
+ }
+ };
+
+ if (materialized_column->is_column_string()) {
+ const auto* column_string_ptr =
+ reinterpret_cast<const vectorized::ColumnString*>(
+ materialized_column.get());
+ search_hash_set(column_string_ptr);
+ } else {
+ search_hash_set(materialized_column.get());
+ }
+
+ if (in_state->null_in_set) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
vec_null_map_to[i] = negative == vec_res[i];
- } else {
- vec_null_map_to[i] = false;
}
} else {
- vec_null_map_to[i] = true;
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ vec_null_map_to[i] = false;
+ }
}
}
} else {
@@ -144,9 +208,9 @@ public:
if (set_data.data == nullptr)
null_in_set = true;
else
- hybrid_set->insert((void *)(set_data.data), set_data.size);
+ hybrid_set->insert((void*)(set_data.data), set_data.size);
}
- vec_res[i] = negative ^ hybrid_set->find((void *) ref_data.data, ref_data.size);
+ vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
if (null_in_set) {
vec_null_map_to[i] = negative == vec_res[i];
} else {
@@ -156,7 +220,8 @@ public:
}
if (block.get_by_position(result).type->is_nullable()) {
- block.replace_by_position(result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
+ block.replace_by_position(
+ result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
} else {
block.replace_by_position(result, std::move(res));
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index b7ef73a2d6..7de001a808 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -1395,7 +1395,19 @@ public class SelectStmt extends QueryStmt {
}
List<Expr> oriGroupingExprs = groupByClause.getOriGroupingExprs();
if (oriGroupingExprs != null) {
+ // we must make sure the expr is analyzed before rewrite
+ try {
+ for (Expr expr : oriGroupingExprs) {
+ expr.analyze(analyzer);
+ }
+ } catch (AnalysisException ex) {
+ //ignore any exception
+ }
rewriter.rewriteList(oriGroupingExprs, analyzer);
+ // after rewrite, need reset the analyze status for later re-analyze
+ for (Expr expr : oriGroupingExprs) {
+ expr.reset();
+ }
}
}
if (orderByElements != null) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index c6d5bf2332..60be79de29 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -66,6 +66,8 @@ public class SortNode extends PlanNode {
private boolean isUnusedExprRemoved = false;
+ private ArrayList<Boolean> nullabilityChangedFlags = Lists.newArrayList();
+
public void setIsAnalyticSort(boolean v) {
isAnalyticSort = v;
}
@@ -131,6 +133,7 @@ public class SortNode extends PlanNode {
for (int i = slotDescriptorList.size() - 1; i >= 0; i--) {
if (!slotDescriptorList.get(i).isMaterialized()) {
resolvedTupleExprs.remove(i);
+ nullabilityChangedFlags.remove(i);
}
}
}
@@ -201,6 +204,9 @@ public class SortNode extends PlanNode {
removeUnusedExprs();
if (resolvedTupleExprs != null) {
sortInfo.setSortTupleSlotExprs(Expr.treesToThrift(resolvedTupleExprs));
+ // FIXME this is a bottom line solution for wrong nullability of resolvedTupleExprs
+ // remove the following line after nereids online
+ sortInfo.setSlotExprsNullabilityChangedFlags(nullabilityChangedFlags);
}
TSortNode sortNode = new TSortNode(sortInfo, useTopN);
@@ -267,11 +273,16 @@ public class SortNode extends PlanNode {
for (int i = 0; i < slotExprs.size(); ++i) {
resolvedTupleExprs.add(slotExprs.get(i));
outputSmap.put(slotExprs.get(i), new SlotRef(sortTupleSlots.get(i)));
+ nullabilityChangedFlags.add(slotExprs.get(i).isNullable());
}
ExprSubstitutionMap childSmap = getCombinedChildSmap();
resolvedTupleExprs = Expr.substituteList(resolvedTupleExprs, childSmap, analyzer, false);
+ for (int i = 0; i < resolvedTupleExprs.size(); ++i) {
+ nullabilityChangedFlags.set(i, nullabilityChangedFlags.get(i) ^ resolvedTupleExprs.get(i).isNullable());
+ }
+
// Remap the ordering exprs to the tuple materialized by this sort node. The mapping
// is a composition of the childSmap and the outputSmap_ because the child node may
// have also remapped its input (e.g., as in a a series of (sort->analytic)* nodes).
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 9f23f96cef..4458416ee0 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -499,6 +499,8 @@ struct TSortInfo {
// Expressions evaluated over the input row that materialize the tuple to be sorted.
// Contains one expr per slot in the materialized tuple.
4: optional list<Exprs.TExpr> sort_tuple_slot_exprs
+ // Indicates the nullable info of sort_tuple_slot_exprs is changed after substitute by child's smap
+ 5: optional list<bool> slot_exprs_nullability_changed_flags
}
struct TSortNode {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org