You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by st...@apache.org on 2024/04/24 01:40:26 UTC
(doris) branch branch-2.0 updated: [fix](nereids) do not transpose semi join agg when mark join (#33949)
This is an automated email from the ASF dual-hosted git repository.
starocean999 pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 325e18d743f [fix](nereids) do not transpose semi join agg when mark join (#33949)
325e18d743f is described below
commit 325e18d743fbf6d7867c126f513c4383eafa8d38
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Wed Apr 24 09:40:19 2024 +0800
[fix](nereids) do not transpose semi join agg when mark join (#33949)
---
.../rules/rewrite/TransposeSemiJoinAgg.java | 1 +
.../rules/rewrite/TransposeSemiJoinAggProject.java | 1 +
.../transposeJoin/transposeSemiJoinAgg.out | 89 ++++++++++++
.../transposeJoin/transposeSemiJoinAgg.groovy | 151 +++++++++++++++++++++
4 files changed, 242 insertions(+)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java
index 1a86e933a51..b0d47f9e64f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java
@@ -36,6 +36,7 @@ public class TransposeSemiJoinAgg extends OneRewriteRuleFactory {
return logicalJoin(logicalAggregate(), any())
.whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder())
.when(join -> join.getJoinType().isLeftSemiOrAntiJoin())
+ .whenNot(join -> join.isMarkJoin())
.then(join -> {
LogicalAggregate<Plan> aggregate = join.left();
if (!canTranspose(aggregate, join)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java
index 24ca535eed8..0bbc65a1f2a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java
@@ -34,6 +34,7 @@ public class TransposeSemiJoinAggProject extends OneRewriteRuleFactory {
return logicalJoin(logicalProject(logicalAggregate()), any())
.whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder())
.when(join -> join.getJoinType().isLeftSemiOrAntiJoin())
+ .whenNot(join -> join.isMarkJoin())
.when(join -> join.left().isAllSlots())
.when(join -> join.left().getProjects().stream().allMatch(n -> n instanceof Slot))
.then(join -> {
diff --git a/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out
new file mode 100644
index 00000000000..79378dff1bf
--- /dev/null
+++ b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out
@@ -0,0 +1,89 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !groupby_positive_case --
+PhysicalResultSink
+--hashAgg[LOCAL]
+----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+------filter((T2.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T2]
+
+-- !groupby_negative_case --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[LOCAL]
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_positive_case --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_negative_case --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !groupby_positive_case2 --
+PhysicalResultSink
+--hashAgg[LOCAL]
+----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+------filter((T2.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T2]
+
+-- !groupby_negative_case2 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[LOCAL]
+------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_positive_case2 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !grouping_negative_case2 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalRepeat
+----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[T1]
+----filter((T2.__DORIS_DELETE_SIGN__ = 0))
+------PhysicalOlapScan[T2]
+
+-- !groupby_negative_case3 --
+PhysicalResultSink
+--hashJoin[LEFT_SEMI_JOIN](T3.len = T3.len)
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[T3]
+----PhysicalOlapScan[T3]
+
diff --git a/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy
new file mode 100644
index 00000000000..3a84754bbaa
--- /dev/null
+++ b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("transposeSemiJoinAgg") {
+ // filter about invisible column "DORIS_DELETE_SIGN = 0" has no impaction on partition pruning
+ String db = context.config.getDbNameByFile(context.file)
+ sql "use ${db}"
+ sql "SET enable_nereids_planner=true"
+ sql "set runtime_filter_mode=OFF";
+ sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
+ sql "SET enable_fallback_to_original_planner=false"
+ sql "set partition_pruning_expand_threshold=10;"
+ sql "set ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
+ sql "drop table if exists T1;"
+ sql """
+ CREATE TABLE T1 (
+ a INT NULL,
+ b INT NULL,
+ c INT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`a`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`a`) BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ ); """
+
+ sql "drop table if exists T2;"
+ sql """
+ CREATE TABLE T2 (
+ a INT NULL,
+ b INT NULL,
+ c INT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`a`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`a`) BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql "set enable_runtime_filter_prune=false;"
+ sql '''
+ alter table T1 modify column a set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709');
+ '''
+ sql '''
+ alter table T1 modify column b set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709');
+ '''
+ sql '''
+ alter table T2 modify column a set stats ('ndv'='100', 'num_nulls'='0', 'row_count'='100');
+ '''
+
+ sql "drop table if exists T3;"
+ sql """
+ CREATE TABLE T3 (
+ str varchar(100),
+ len int
+ ) DUPLICATE KEY(str)
+ DISTRIBUTED BY HASH(str) BUCKETS 10
+ PROPERTIES("replication_num" = "1");
+ """
+
+ // RULE: TransposeSemiJoinAggProject
+ // 1. group-by(without grouping sets)
+ // agg-leftSemi => leftSemi-agg
+ qt_groupby_positive_case """
+ explain shape plan
+ select T3.a
+ from (select a, b, sum(c) from T1 group by a, b) T3
+ left semi join T2 on T3.a=T2.a;
+ """
+
+ // agg-leftSemi: agg not pushed down
+ qt_groupby_negative_case """
+ explain shape plan
+ select T3.a
+ from (select a, b, sum(c) as d from T1 group by a, b) T3
+ left semi join T2 on T3.D=T2.a;
+ """
+
+ // 2 grouping sets
+ // agg-leftSemi => leftSemi-agg
+ qt_grouping_positive_case """
+ explain shape plan
+ select T3.a
+ from (select a, b, sum(c) from T1 group by grouping sets ((a, b), (a))) T3
+ left semi join T2 on T3.a=T2.a;
+ """
+
+ // agg-leftSemi: agg not pushed down
+ qt_grouping_negative_case """
+ explain shape plan
+ select T3.a
+ from (select a, b, sum(c) as D from T1 group by grouping sets ((a, b), (a), ())) T3
+ left semi join T2 on T3.D=T2.a;
+ """
+
+ // RULE: TransposeSemiJoinAgg
+ // 1. group-by(without grouping sets)
+ // agg-leftSemi => leftSemi-agg
+ qt_groupby_positive_case2 """
+ explain shape plan
+ select T3.a
+ from (select a from T1 group by a) T3
+ left semi join T2 on T3.a=T2.a;
+ """
+
+ // agg-leftSemi: agg not pushed down
+ qt_groupby_negative_case2 """
+ explain shape plan
+ select T3.D
+ from (select sum(c) as D from T1 group by a) T3
+ left semi join T2 on T3.D=T2.a;
+ """
+
+ // 2 grouping sets
+ // agg-leftSemi => leftSemi-agg
+ qt_grouping_positive_case2 """
+ explain shape plan
+ select T3.a
+ from (select a from T1 group by grouping sets ((a, b), (a))) T3
+ left semi join T2 on T3.a=T2.a;
+ """
+ // agg-leftSemi: agg not pushed down
+ qt_grouping_negative_case2 """
+ explain shape plan
+ select T3.D
+ from (select sum(C) as D from T1 group by grouping sets ((a, b), (a), ())) T3
+ left semi join T2 on T3.D=T2.a;
+ """
+ // https://github.com/apache/doris/issues/31308
+ qt_groupby_negative_case3 """
+ explain shape plan
+ select case when len in (select len from T3) then 1 else 1 end c1 from T3 group by len;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org