You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/30 06:44:52 UTC
[doris] branch branch-1.2-lts updated: [cherry-pick](branch) cherry pick #15841 #15987 #15945 (#16151)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new ca41c7686e [cherry-pick](branch) cherry pick #15841 #15987 #15945 (#16151)
ca41c7686e is described below
commit ca41c7686ea30f119df122eefc924f74219dd7ed
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jan 30 14:44:45 2023 +0800
[cherry-pick](branch) cherry pick #15841 #15987 #15945 (#16151)
---
be/src/vec/common/string_ref.h | 1 +
be/src/vec/exec/join/vhash_join_node.cpp | 9 ++-
be/src/vec/exprs/table_function/vexplode_split.cpp | 61 +++++++++++++++----
be/src/vec/exprs/table_function/vexplode_split.h | 7 ++-
be/test/vec/function/table_function_test.cpp | 27 ++++++---
.../test_null_aware_left_anti_join.out | 1 -
regression-test/data/query_p0/join/test_join.out | 70 ++++++++++++++++++++++
.../suites/query_p0/join/test_join.groovy | 2 +-
8 files changed, 153 insertions(+), 25 deletions(-)
diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h
index 625326b747..c146b48187 100644
--- a/be/src/vec/common/string_ref.h
+++ b/be/src/vec/common/string_ref.h
@@ -151,6 +151,7 @@ struct StringRef {
std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); }
explicit operator std::string() const { return to_string(); }
+ operator std::string_view() const { return std::string_view {data, size}; }
StringRef substring(int start_pos, int new_len) const {
return StringRef(data + start_pos, (new_len < 0) ? (size - start_pos) : new_len);
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 13337a2e83..8236f61fb6 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -775,7 +775,14 @@ Status HashJoinNode::_materialize_build_side(RuntimeState* state) {
}
}
- _process_hashtable_ctx_variants_init(state);
+ if (eos || !_should_build_hash_table) {
+ _process_hashtable_ctx_variants_init(state);
+ }
+ // Since the comparison of null values is meaningless, null aware left anti join should not output null
+ // when the build side is not empty.
+ if (eos && !_build_blocks->empty() && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
+ _probe_ignore_null = true;
+ }
return Status::OK();
}
diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp
index fbd37d4d3f..3401474715 100644
--- a/be/src/vec/exprs/table_function/vexplode_split.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_split.cpp
@@ -45,8 +45,25 @@ Status VExplodeSplitTableFunction::process_init(vectorized::Block* block) {
RETURN_IF_ERROR(_vexpr_context->root()->children()[1]->execute(_vexpr_context, block,
&delimiter_column_idx));
- _text_column = block->get_by_position(text_column_idx).column;
- _delimiter_column = block->get_by_position(delimiter_column_idx).column;
+ // dispose test column
+ _text_column =
+ block->get_by_position(text_column_idx).column->convert_to_full_column_if_const();
+ if (_text_column->is_nullable()) {
+ const auto& column_null = assert_cast<const ColumnNullable&>(*_text_column);
+ _test_null_map = column_null.get_null_map_data().data();
+ _real_text_column = &assert_cast<const ColumnString&>(column_null.get_nested_column());
+ } else {
+ _real_text_column = &assert_cast<const ColumnString&>(*_text_column);
+ }
+
+ // dispose delimiter column
+ auto& delimiter_const_column = block->get_by_position(delimiter_column_idx).column;
+ if (is_column_const(*delimiter_const_column)) {
+ _delimiter = delimiter_const_column->get_data_at(0);
+ } else {
+ return Status::NotSupported(
+ "explode_split(test, delimiter) delimiter column must be const");
+ }
return Status::OK();
}
@@ -55,17 +72,37 @@ Status VExplodeSplitTableFunction::process_row(size_t row_idx) {
_is_current_empty = false;
_eos = false;
- StringRef text = _text_column->get_data_at(row_idx);
- StringRef delimiter = _delimiter_column->get_data_at(row_idx);
-
- if (text.data == nullptr) {
+ if ((_test_null_map and _test_null_map[row_idx]) || _delimiter.data == nullptr) {
_is_current_empty = true;
_cur_size = 0;
_cur_offset = 0;
} else {
- //TODO: implement non-copy split string reference
- _backup = strings::Split(StringPiece((char*)text.data, text.size),
- StringPiece((char*)delimiter.data, delimiter.size));
+ // TODO: use the function to be better string_view/StringRef split
+ auto split = [](std::string_view strv, std::string_view delims = " ") {
+ std::vector<std::string_view> output;
+ auto first = strv.begin();
+ auto last = strv.end();
+
+ do {
+ const auto second =
+ std::search(first, last, std::cbegin(delims), std::cend(delims));
+ if (first != second) {
+ output.emplace_back(strv.substr(std::distance(strv.begin(), first),
+ std::distance(first, second)));
+ first = std::next(second);
+ } else {
+ output.emplace_back("", 0);
+ first = std::next(second, delims.size());
+ }
+
+ if (second == last) {
+ break;
+ }
+ } while (first != last);
+
+ return output;
+ };
+ _backup = split(_real_text_column->get_data_at(row_idx), _delimiter);
_cur_size = _backup.size();
_cur_offset = 0;
@@ -76,7 +113,9 @@ Status VExplodeSplitTableFunction::process_row(size_t row_idx) {
Status VExplodeSplitTableFunction::process_close() {
_text_column = nullptr;
- _delimiter_column = nullptr;
+ _real_text_column = nullptr;
+ _test_null_map = nullptr;
+ _delimiter = {};
return Status::OK();
}
@@ -84,7 +123,7 @@ Status VExplodeSplitTableFunction::get_value(void** output) {
if (_is_current_empty) {
*output = nullptr;
} else {
- *output = _backup[_cur_offset].data();
+ *output = const_cast<char*>(_backup[_cur_offset].data());
}
return Status::OK();
}
diff --git a/be/src/vec/exprs/table_function/vexplode_split.h b/be/src/vec/exprs/table_function/vexplode_split.h
index c7b780764e..f4ddf43df6 100644
--- a/be/src/vec/exprs/table_function/vexplode_split.h
+++ b/be/src/vec/exprs/table_function/vexplode_split.h
@@ -37,10 +37,13 @@ public:
virtual Status get_value_length(int64_t* length) override;
private:
- using ExplodeSplitTableFunction::process;
+ std::vector<std::string_view> _backup;
ColumnPtr _text_column;
- ColumnPtr _delimiter_column;
+ const uint8_t* _test_null_map = nullptr;
+ const ColumnString* _real_text_column = nullptr;
+
+ StringRef _delimiter = {};
};
} // namespace doris::vectorized
diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp
index b83c019e32..3b86a219cb 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -179,17 +179,26 @@ TEST_F(TableFunctionTest, vexplode_split) {
// Case 2: explode_split("a,b,c", ",") --> ["a", "b", "c"]
// Case 3: explode_split("a,b,c", "a,")) --> ["", "b,c"]
// Case 4: explode_split("", ",")) --> [""]
- InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
- InputDataSet input_set = {{Null(), Null()},
- {std::string("a,b,c"), std::string(",")},
- {std::string("a,b,c"), std::string("a,")},
- {std::string(""), std::string(",")}};
+ InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}};
+ InputDataSet input_sets = {{Null(), Null()},
+ {std::string("a,b,c"), std::string(",")},
+ {std::string("a,b,c"), std::string("a,")},
+ {std::string(""), std::string(",")}};
InputTypeSet output_types = {TypeIndex::String};
- InputDataSet output_set = {{std::string("a")}, {std::string("b")}, {std::string("c")},
- {std::string("")}, {std::string("b,c")}, {std::string("")}};
-
- check_vec_table_function(&tfn, input_types, input_set, output_types, output_set);
+ InputDataSet output_sets = {{},
+ {std::string("a"), std::string("b"), std::string("c")},
+ {std::string(""), std::string("b,c")},
+ {std::string("")}};
+
+ for (int i = 0; i < input_sets.size(); ++i) {
+ InputDataSet input_set {input_sets[i]};
+ InputDataSet output_set {};
+ for (const auto& data : output_sets[i]) {
+ output_set.emplace_back(std::vector<AnyType> {data});
+ }
+ check_vec_table_function(&tfn, input_types, input_set, output_types, output_set);
+ }
}
}
diff --git a/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out b/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out
index 445f07fa65..d33e4e2947 100644
--- a/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out
+++ b/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out
@@ -3,7 +3,6 @@
2
-- !select --
-\N
2
-- !select --
diff --git a/regression-test/data/query_p0/join/test_join.out b/regression-test/data/query_p0/join/test_join.out
index 94d5e31dae..987bbf1ecb 100644
--- a/regression-test/data/query_p0/join/test_join.out
+++ b/regression-test/data/query_p0/join/test_join.out
@@ -1234,6 +1234,74 @@ false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk
14
15
+-- !left_anti_join_null_1 --
+\N
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
+-- !left_anti_join_null_2 --
+\N
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
+-- !left_anti_join_null_3 --
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
+-- !left_anti_join_null_4 --
+\N
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
-- !right_anti_join_with_other_pred --
1
2
@@ -1693,6 +1761,8 @@ false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.
false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727
false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0
+-- !join_bug4 --
+
-- !join_basic1 --
false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727
false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727
diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy
index 5137156f6a..56c47e1080 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -920,7 +920,7 @@ suite("test_join", "query,p0") {
// https://github.com/apache/doris/issues/4210
qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2)
order by k1"""
-
+ qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1 from baseall where k1 is not null )"""
// basic join
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org