You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by st...@apache.org on 2024/04/24 01:40:26 UTC

(doris) branch branch-2.0 updated: [fix](nereids) do not transpose semi join agg when mark join (#33949)

This is an automated email from the ASF dual-hosted git repository.

starocean999 pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 325e18d743f [fix](nereids) do not transpose semi join agg when mark join (#33949)
325e18d743f is described below

commit 325e18d743fbf6d7867c126f513c4383eafa8d38
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Wed Apr 24 09:40:19 2024 +0800

    [fix](nereids) do not transpose semi join agg when mark join (#33949)
---
 .../rules/rewrite/TransposeSemiJoinAgg.java        |   1 +
 .../rules/rewrite/TransposeSemiJoinAggProject.java |   1 +
 .../transposeJoin/transposeSemiJoinAgg.out         |  89 ++++++++++++
 .../transposeJoin/transposeSemiJoinAgg.groovy      | 151 +++++++++++++++++++++
 4 files changed, 242 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java
index 1a86e933a51..b0d47f9e64f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java
@@ -36,6 +36,7 @@ public class TransposeSemiJoinAgg extends OneRewriteRuleFactory {
         return logicalJoin(logicalAggregate(), any())
                 .whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder())
                 .when(join -> join.getJoinType().isLeftSemiOrAntiJoin())
+                .whenNot(join -> join.isMarkJoin())
                 .then(join -> {
                     LogicalAggregate<Plan> aggregate = join.left();
                     if (!canTranspose(aggregate, join)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java
index 24ca535eed8..0bbc65a1f2a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java
@@ -34,6 +34,7 @@ public class TransposeSemiJoinAggProject extends OneRewriteRuleFactory {
         return logicalJoin(logicalProject(logicalAggregate()), any())
                 .whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder())
                 .when(join -> join.getJoinType().isLeftSemiOrAntiJoin())
+                .whenNot(join -> join.isMarkJoin())
                 .when(join -> join.left().isAllSlots())
                 .when(join -> join.left().getProjects().stream().allMatch(n -> n instanceof Slot))
                 .then(join -> {
diff --git a/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out
new file mode 100644
index 00000000000..79378dff1bf
--- /dev/null
+++ b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out
@@ -0,0 +1,89 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !groupby_positive_case --
+PhysicalResultSink
+--hashAgg[LOCAL]
+----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+------filter((T2.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T2]
+
+-- !groupby_negative_case --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[LOCAL]
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_positive_case --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_negative_case --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !groupby_positive_case2 --
+PhysicalResultSink
+--hashAgg[LOCAL]
+----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+------filter((T2.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T2]
+
+-- !groupby_negative_case2 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[LOCAL]
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_positive_case2 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_negative_case2 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !groupby_negative_case3 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.len = T3.len)
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[T3]
+----PhysicalOlapScan[T3]
+
diff --git a/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy
new file mode 100644
index 00000000000..3a84754bbaa
--- /dev/null
+++ b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("transposeSemiJoinAgg") {
+    // filter about invisible column "DORIS_DELETE_SIGN = 0" has no impaction on partition pruning
+    String db = context.config.getDbNameByFile(context.file)
+    sql "use ${db}"
+    sql "SET enable_nereids_planner=true"
+    sql "set runtime_filter_mode=OFF";
+    sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
+    sql "SET enable_fallback_to_original_planner=false"
+    sql "set partition_pruning_expand_threshold=10;"
+    sql "set ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
+    sql "drop table if exists T1;"
+    sql """
+        CREATE TABLE T1 (
+        a INT NULL,
+        b INT NULL,
+        c INT NULL
+        ) ENGINE=OLAP
+        UNIQUE KEY(`a`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`a`) BUCKETS 10
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        ); """
+    
+    sql "drop table if exists T2;"
+    sql """
+        CREATE TABLE T2 (
+        a INT NULL,
+        b INT NULL,
+        c INT NULL
+        ) ENGINE=OLAP
+        UNIQUE KEY(`a`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`a`) BUCKETS 10
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+        """
+
+    sql "set enable_runtime_filter_prune=false;"
+    sql '''
+    alter table T1 modify column a set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709');
+    '''
+    sql '''
+    alter table T1 modify column b set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709');
+    '''
+    sql '''
+    alter table T2 modify column a set stats ('ndv'='100', 'num_nulls'='0', 'row_count'='100');
+    '''
+
+    sql "drop table if exists T3;"
+    sql """
+        CREATE TABLE T3 (
+        str varchar(100),
+        len  int
+        ) DUPLICATE KEY(str)
+        DISTRIBUTED BY HASH(str) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+
+    // RULE: TransposeSemiJoinAggProject
+    // 1. group-by(without grouping sets) 
+    // agg-leftSemi => leftSemi-agg
+    qt_groupby_positive_case """
+        explain shape plan
+        select T3.a
+        from (select a, b, sum(c) from T1 group by a, b) T3
+        left semi join T2 on T3.a=T2.a;
+    """
+
+    // agg-leftSemi: agg not pushed down
+    qt_groupby_negative_case """
+        explain shape plan
+        select T3.a
+        from (select a, b, sum(c) as d from T1 group by a, b) T3
+        left semi join T2 on T3.D=T2.a;
+        """
+
+    // 2 grouping sets
+    // agg-leftSemi => leftSemi-agg
+    qt_grouping_positive_case """
+        explain shape plan
+        select T3.a
+        from (select a, b, sum(c) from T1 group by grouping sets ((a, b), (a))) T3
+        left semi join T2 on T3.a=T2.a;
+    """
+
+    // agg-leftSemi: agg not pushed down
+    qt_grouping_negative_case """
+        explain shape plan
+        select T3.a
+        from (select a, b, sum(c) as D from T1 group by grouping sets ((a, b), (a), ())) T3
+        left semi join T2 on T3.D=T2.a;
+    """
+
+    // RULE: TransposeSemiJoinAgg
+    // 1. group-by(without grouping sets) 
+    // agg-leftSemi => leftSemi-agg
+    qt_groupby_positive_case2 """
+        explain shape plan
+        select T3.a
+        from (select a from T1 group by a) T3
+        left semi join T2 on T3.a=T2.a;
+    """
+
+    // agg-leftSemi: agg not pushed down
+    qt_groupby_negative_case2 """
+        explain shape plan
+        select T3.D
+        from (select sum(c) as D from T1 group by a) T3
+        left semi join T2 on T3.D=T2.a;
+        """
+
+    // 2 grouping sets
+    // agg-leftSemi => leftSemi-agg
+    qt_grouping_positive_case2 """
+        explain shape plan
+        select T3.a
+        from (select a from T1 group by grouping sets ((a, b), (a))) T3
+        left semi join T2 on T3.a=T2.a;
+    """
+    // agg-leftSemi: agg not pushed down
+    qt_grouping_negative_case2 """
+        explain shape plan
+        select T3.D
+        from (select sum(C) as D from T1 group by grouping sets ((a, b), (a), ())) T3
+        left semi join T2 on T3.D=T2.a;
+        """
+    // https://github.com/apache/doris/issues/31308
+    qt_groupby_negative_case3 """
+        explain shape plan
+        select case when len in (select len from T3) then 1 else 1 end c1 from T3 group by len;
+        """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org