You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/30 06:44:52 UTC

[doris] branch branch-1.2-lts updated: [cherry-pick](branch) cherry pick #15841 #15987 #15945 (#16151)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new ca41c7686e [cherry-pick](branch) cherry pick #15841 #15987 #15945 (#16151)
ca41c7686e is described below

commit ca41c7686ea30f119df122eefc924f74219dd7ed
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jan 30 14:44:45 2023 +0800

    [cherry-pick](branch) cherry pick #15841 #15987 #15945 (#16151)
---
 be/src/vec/common/string_ref.h                     |  1 +
 be/src/vec/exec/join/vhash_join_node.cpp           |  9 ++-
 be/src/vec/exprs/table_function/vexplode_split.cpp | 61 +++++++++++++++----
 be/src/vec/exprs/table_function/vexplode_split.h   |  7 ++-
 be/test/vec/function/table_function_test.cpp       | 27 ++++++---
 .../test_null_aware_left_anti_join.out             |  1 -
 regression-test/data/query_p0/join/test_join.out   | 70 ++++++++++++++++++++++
 .../suites/query_p0/join/test_join.groovy          |  2 +-
 8 files changed, 153 insertions(+), 25 deletions(-)

diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h
index 625326b747..c146b48187 100644
--- a/be/src/vec/common/string_ref.h
+++ b/be/src/vec/common/string_ref.h
@@ -151,6 +151,7 @@ struct StringRef {
     std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); }
 
     explicit operator std::string() const { return to_string(); }
+    operator std::string_view() const { return std::string_view {data, size}; }
 
     StringRef substring(int start_pos, int new_len) const {
         return StringRef(data + start_pos, (new_len < 0) ? (size - start_pos) : new_len);
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
index 13337a2e83..8236f61fb6 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -775,7 +775,14 @@ Status HashJoinNode::_materialize_build_side(RuntimeState* state) {
         }
     }
 
-    _process_hashtable_ctx_variants_init(state);
+    if (eos || !_should_build_hash_table) {
+        _process_hashtable_ctx_variants_init(state);
+    }
+    // Since the comparison of null values is meaningless, null aware left anti join should not output null
+    // when the build side is not empty.
+    if (eos && !_build_blocks->empty() && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
+        _probe_ignore_null = true;
+    }
     return Status::OK();
 }
 
diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp
index fbd37d4d3f..3401474715 100644
--- a/be/src/vec/exprs/table_function/vexplode_split.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_split.cpp
@@ -45,8 +45,25 @@ Status VExplodeSplitTableFunction::process_init(vectorized::Block* block) {
     RETURN_IF_ERROR(_vexpr_context->root()->children()[1]->execute(_vexpr_context, block,
                                                                    &delimiter_column_idx));
 
-    _text_column = block->get_by_position(text_column_idx).column;
-    _delimiter_column = block->get_by_position(delimiter_column_idx).column;
+    // dispose test column
+    _text_column =
+            block->get_by_position(text_column_idx).column->convert_to_full_column_if_const();
+    if (_text_column->is_nullable()) {
+        const auto& column_null = assert_cast<const ColumnNullable&>(*_text_column);
+        _test_null_map = column_null.get_null_map_data().data();
+        _real_text_column = &assert_cast<const ColumnString&>(column_null.get_nested_column());
+    } else {
+        _real_text_column = &assert_cast<const ColumnString&>(*_text_column);
+    }
+
+    // dispose delimiter column
+    auto& delimiter_const_column = block->get_by_position(delimiter_column_idx).column;
+    if (is_column_const(*delimiter_const_column)) {
+        _delimiter = delimiter_const_column->get_data_at(0);
+    } else {
+        return Status::NotSupported(
+                "explode_split(test, delimiter) delimiter column must be const");
+    }
 
     return Status::OK();
 }
@@ -55,17 +72,37 @@ Status VExplodeSplitTableFunction::process_row(size_t row_idx) {
     _is_current_empty = false;
     _eos = false;
 
-    StringRef text = _text_column->get_data_at(row_idx);
-    StringRef delimiter = _delimiter_column->get_data_at(row_idx);
-
-    if (text.data == nullptr) {
+    if ((_test_null_map and _test_null_map[row_idx]) || _delimiter.data == nullptr) {
         _is_current_empty = true;
         _cur_size = 0;
         _cur_offset = 0;
     } else {
-        //TODO: implement non-copy split string reference
-        _backup = strings::Split(StringPiece((char*)text.data, text.size),
-                                 StringPiece((char*)delimiter.data, delimiter.size));
+        // TODO: use the function to be better string_view/StringRef split
+        auto split = [](std::string_view strv, std::string_view delims = " ") {
+            std::vector<std::string_view> output;
+            auto first = strv.begin();
+            auto last = strv.end();
+
+            do {
+                const auto second =
+                        std::search(first, last, std::cbegin(delims), std::cend(delims));
+                if (first != second) {
+                    output.emplace_back(strv.substr(std::distance(strv.begin(), first),
+                                                    std::distance(first, second)));
+                    first = std::next(second);
+                } else {
+                    output.emplace_back("", 0);
+                    first = std::next(second, delims.size());
+                }
+
+                if (second == last) {
+                    break;
+                }
+            } while (first != last);
+
+            return output;
+        };
+        _backup = split(_real_text_column->get_data_at(row_idx), _delimiter);
 
         _cur_size = _backup.size();
         _cur_offset = 0;
@@ -76,7 +113,9 @@ Status VExplodeSplitTableFunction::process_row(size_t row_idx) {
 
 Status VExplodeSplitTableFunction::process_close() {
     _text_column = nullptr;
-    _delimiter_column = nullptr;
+    _real_text_column = nullptr;
+    _test_null_map = nullptr;
+    _delimiter = {};
     return Status::OK();
 }
 
@@ -84,7 +123,7 @@ Status VExplodeSplitTableFunction::get_value(void** output) {
     if (_is_current_empty) {
         *output = nullptr;
     } else {
-        *output = _backup[_cur_offset].data();
+        *output = const_cast<char*>(_backup[_cur_offset].data());
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exprs/table_function/vexplode_split.h b/be/src/vec/exprs/table_function/vexplode_split.h
index c7b780764e..f4ddf43df6 100644
--- a/be/src/vec/exprs/table_function/vexplode_split.h
+++ b/be/src/vec/exprs/table_function/vexplode_split.h
@@ -37,10 +37,13 @@ public:
     virtual Status get_value_length(int64_t* length) override;
 
 private:
-    using ExplodeSplitTableFunction::process;
+    std::vector<std::string_view> _backup;
 
     ColumnPtr _text_column;
-    ColumnPtr _delimiter_column;
+    const uint8_t* _test_null_map = nullptr;
+    const ColumnString* _real_text_column = nullptr;
+
+    StringRef _delimiter = {};
 };
 
 } // namespace doris::vectorized
diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp
index b83c019e32..3b86a219cb 100644
--- a/be/test/vec/function/table_function_test.cpp
+++ b/be/test/vec/function/table_function_test.cpp
@@ -179,17 +179,26 @@ TEST_F(TableFunctionTest, vexplode_split) {
         // Case 2: explode_split("a,b,c", ",") --> ["a", "b", "c"]
         // Case 3: explode_split("a,b,c", "a,")) --> ["", "b,c"]
         // Case 4: explode_split("", ",")) --> [""]
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
-        InputDataSet input_set = {{Null(), Null()},
-                                  {std::string("a,b,c"), std::string(",")},
-                                  {std::string("a,b,c"), std::string("a,")},
-                                  {std::string(""), std::string(",")}};
+        InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}};
+        InputDataSet input_sets = {{Null(), Null()},
+                                   {std::string("a,b,c"), std::string(",")},
+                                   {std::string("a,b,c"), std::string("a,")},
+                                   {std::string(""), std::string(",")}};
 
         InputTypeSet output_types = {TypeIndex::String};
-        InputDataSet output_set = {{std::string("a")}, {std::string("b")},   {std::string("c")},
-                                   {std::string("")},  {std::string("b,c")}, {std::string("")}};
-
-        check_vec_table_function(&tfn, input_types, input_set, output_types, output_set);
+        InputDataSet output_sets = {{},
+                                    {std::string("a"), std::string("b"), std::string("c")},
+                                    {std::string(""), std::string("b,c")},
+                                    {std::string("")}};
+
+        for (int i = 0; i < input_sets.size(); ++i) {
+            InputDataSet input_set {input_sets[i]};
+            InputDataSet output_set {};
+            for (const auto& data : output_sets[i]) {
+                output_set.emplace_back(std::vector<AnyType> {data});
+            }
+            check_vec_table_function(&tfn, input_types, input_set, output_types, output_set);
+        }
     }
 }
 
diff --git a/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out b/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out
index 445f07fa65..d33e4e2947 100644
--- a/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out
+++ b/regression-test/data/correctness_p0/test_null_aware_left_anti_join.out
@@ -3,7 +3,6 @@
 2
 
 -- !select --
-\N
 2
 
 -- !select --
diff --git a/regression-test/data/query_p0/join/test_join.out b/regression-test/data/query_p0/join/test_join.out
index 94d5e31dae..987bbf1ecb 100644
--- a/regression-test/data/query_p0/join/test_join.out
+++ b/regression-test/data/query_p0/join/test_join.out
@@ -1234,6 +1234,74 @@ false	3	1989	1002	11011905	24453.325	false	2012-03-14	2000-01-01T00:00	yunlj8@nk
 14
 15
 
+-- !left_anti_join_null_1 --
+\N
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
+-- !left_anti_join_null_2 --
+\N
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
+-- !left_anti_join_null_3 --
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
+-- !left_anti_join_null_4 --
+\N
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+
 -- !right_anti_join_with_other_pred --
 1
 2
@@ -1693,6 +1761,8 @@ false	1	1989	1001	11011902	123.123	true	1989-03-21	1989-03-21T13:00	wangjuoo4	0.
 false	2	1986	1001	11011903	1243.500	false	1901-12-31	1989-03-21T13:00	wangynnsf	20.268	789.25	string12345	-170141183460469231731687303715884105727
 false	3	1989	1002	11011905	24453.325	false	2012-03-14	2000-01-01T00:00	yunlj8@nk	78945.0	3654.0	string12345	0
 
+-- !join_bug4 --
+
 -- !join_basic1 --
 false	1	1989	1001	11011902	123.123	true	1989-03-21	1989-03-21T13:00	wangjuoo4	0.1	6.333	string12345	170141183460469231731687303715884105727	false	1	1989	1001	11011902	123.123	true	1989-03-21	1989-03-21T13:00	wangjuoo4	0.1	6.333	string12345	170141183460469231731687303715884105727
 false	2	1986	1001	11011903	1243.500	false	1901-12-31	1989-03-21T13:00	wangynnsf	20.268	789.25	string12345	-170141183460469231731687303715884105727	false	2	1986	1001	11011903	1243.500	false	1901-12-31	1989-03-21T13:00	wangynnsf	20.268	789.25	string12345	-170141183460469231731687303715884105727
diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy
index 5137156f6a..56c47e1080 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -920,7 +920,7 @@ suite("test_join", "query,p0") {
     // https://github.com/apache/doris/issues/4210
     qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2)
            order by k1"""
-
+    qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1 from baseall where k1 is not null )"""
 
 
     // basic join


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org