You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ja...@apache.org on 2023/06/29 14:04:48 UTC
[doris] branch master updated: [feature](Nereids): infer distinct from SetOperator (#21235)
This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9756ff1e25 [feature](Nereids): infer distinct from SetOperator (#21235)
9756ff1e25 is described below
commit 9756ff1e259fae3f29143f55e01aac549a73f612
Author: jakevin <ja...@gmail.com>
AuthorDate: Thu Jun 29 22:04:41 2023 +0800
[feature](Nereids): infer distinct from SetOperator (#21235)
Infer distinct from Distinct SetOperator, and put distinct above children to reduce data.
tpcds_sf100 q14:
before
100 rows in set (7.60 sec)
after
100 rows in set (6.80 sec)
---
.../doris/nereids/jobs/executor/Rewriter.java | 2 +
.../org/apache/doris/nereids/rules/RuleType.java | 1 +
.../analysis/ProjectWithDistinctToAggregate.java | 36 ++++-----
.../rules/rewrite/InferSetOperatorDistinct.java | 54 ++++++++++++++
.../trees/plans/logical/LogicalAggregate.java | 7 ++
.../nereids_tpcds_shape_sf100_p0/shape/query14.out | 86 ++++++++++++----------
6 files changed, 125 insertions(+), 61 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index f7bd7c1a5a..ba3d71d896 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -61,6 +61,7 @@ import org.apache.doris.nereids.rules.rewrite.InferAggNotNull;
import org.apache.doris.nereids.rules.rewrite.InferFilterNotNull;
import org.apache.doris.nereids.rules.rewrite.InferJoinNotNull;
import org.apache.doris.nereids.rules.rewrite.InferPredicates;
+import org.apache.doris.nereids.rules.rewrite.InferSetOperatorDistinct;
import org.apache.doris.nereids.rules.rewrite.InlineCTE;
import org.apache.doris.nereids.rules.rewrite.MergeFilters;
import org.apache.doris.nereids.rules.rewrite.MergeOneRowRelationIntoUnion;
@@ -232,6 +233,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
bottomUp(new MergeSetOperations()),
bottomUp(new PushProjectIntoOneRowRelation()),
topDown(new MergeOneRowRelationIntoUnion()),
+ costBased(topDown(new InferSetOperatorDistinct())),
topDown(new BuildAggForUnion())
),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index bf95f4468e..b7e75c3659 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -107,6 +107,7 @@ public enum RuleType {
ELIMINATE_HINT(RuleTypeClass.REWRITE),
INFER_PREDICATES(RuleTypeClass.REWRITE),
INFER_AGG_NOT_NULL(RuleTypeClass.REWRITE),
+ INFER_SET_OPERATOR_DISTINCT(RuleTypeClass.REWRITE),
INFER_FILTER_NOT_NULL(RuleTypeClass.REWRITE),
INFER_JOIN_NOT_NULL(RuleTypeClass.REWRITE),
// subquery analyze
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
index 1b6b270ab7..230eef9e1f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
@@ -22,38 +22,32 @@ import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
-
-import com.google.common.collect.Lists;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
/**
* ProjectWithDistinctToAggregate.
- *
+ * <p>
* example sql:
* <pre>
- * select distinct value
- * from tbl
- * </pre>
- *
- * origin plan: transformed plan:
+ * select distinct value from tbl
*
- * LogicalProject(projects=[distinct value]) LogicalAggregate(groupBy=[value], output=[value])
- * | => |
- * LogicalOlapScan(table=tbl) LogicalOlapScan(table=tbl)
+ * LogicalProject(projects=[distinct value])
+ * |
+ * LogicalOlapScan(table=tbl)
+ * =>
+ * LogicalAggregate(groupBy=[value], output=[value])
+ * |
+ * LogicalOlapScan(table=tbl)
+ * </pre>
*/
public class ProjectWithDistinctToAggregate extends OneAnalysisRuleFactory {
@Override
public Rule build() {
return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build(
- logicalProject().then(project -> {
- if (project.isDistinct() && project.getProjects()
- .stream()
- .noneMatch(this::hasAggregateFunction)) {
- return new LogicalAggregate<>(Lists.newArrayList(project.getProjects()), project.getProjects(),
- project.child());
- } else {
- return project;
- }
- })
+ logicalProject()
+ .when(LogicalProject::isDistinct)
+ .whenNot(project -> project.getProjects().stream().anyMatch(this::hasAggregateFunction))
+ .then(project -> new LogicalAggregate<>(project.getProjects(), project.child()))
);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferSetOperatorDistinct.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferSetOperatorDistinct.java
new file mode 100644
index 0000000000..c77edb2e17
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferSetOperatorDistinct.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.algebra.SetOperation.Qualifier;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Infer Distinct from SetOperator;
+ * Example:
+ * <pre>
+ * Intersect
+ * Intersect -> |
+ * Agg for Distinct
+ * </pre>
+ */
+public class InferSetOperatorDistinct extends OneRewriteRuleFactory {
+ @Override
+ public Rule build() {
+ return logicalSetOperation()
+ .when(operation -> operation.getQualifier() == Qualifier.DISTINCT)
+ .then(setOperation -> {
+ List<Plan> newChildren = setOperation.children().stream()
+ .map(child -> new LogicalAggregate<>(ImmutableList.copyOf(child.getOutput()), child))
+ .collect(ImmutableList.toImmutableList());
+ if (newChildren.equals(setOperation.children())) {
+ return null;
+ }
+ return setOperation.withChildren(newChildren);
+ }).toRule(RuleType.INFER_SET_OPERATOR_DISTINCT);
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
index e9926d4d92..d6136a0812 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
@@ -76,6 +76,13 @@ public class LogicalAggregate<CHILD_TYPE extends Plan>
false, Optional.empty(), child);
}
+ /**
+ * Distinct Agg
+ */
+ public LogicalAggregate(List<NamedExpression> namedExpressions, CHILD_TYPE child) {
+ this(ImmutableList.copyOf(namedExpressions), namedExpressions, false, Optional.empty(), child);
+ }
+
public LogicalAggregate(List<Expression> groupByExpressions,
List<NamedExpression> outputExpressions, boolean ordinalIsResolved, CHILD_TYPE child) {
this(groupByExpressions, outputExpressions, false, ordinalIsResolved, Optional.empty(),
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
index 4877f71f8c..ecf6a69c87 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query14.out
@@ -4,49 +4,55 @@ CteAnchor[cteId= ( CTEId#8=] )
--CteProducer[cteId= ( CTEId#8=] )
----PhysicalProject
------hashJoin[INNER_JOIN](item.i_brand_id = t.brand_id)(item.i_class_id = t.class_id)(item.i_category_id = t.category_id)
---------PhysicalIntersect
-----------PhysicalDistribute
-------------PhysicalProject
---------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = iss.i_item_sk)
-----------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = d1.d_date_sk)
-------------------PhysicalProject
---------------------PhysicalOlapScan[store_sales]
-------------------PhysicalDistribute
---------------------PhysicalProject
-----------------------filter((d1.d_year <= 2002)(d1.d_year >= 2000))
-------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute
-------------------PhysicalProject
---------------------PhysicalOlapScan[item]
-----------PhysicalDistribute
-------------PhysicalProject
---------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = ics.i_item_sk)
-----------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d2.d_date_sk)
-------------------PhysicalProject
---------------------PhysicalOlapScan[catalog_sales]
-------------------PhysicalDistribute
---------------------PhysicalProject
-----------------------filter((d2.d_year >= 2000)(d2.d_year <= 2002))
-------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute
+--------PhysicalProject
+----------PhysicalOlapScan[item]
+--------PhysicalDistribute
+----------PhysicalIntersect
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute
+----------------hashAgg[LOCAL]
------------------PhysicalProject
---------------------PhysicalOlapScan[item]
-----------PhysicalDistribute
-------------PhysicalProject
---------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = iws.i_item_sk)
-----------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = d3.d_date_sk)
+--------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = iss.i_item_sk)
+----------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = d1.d_date_sk)
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[store_sales]
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------filter((d1.d_year <= 2002)(d1.d_year >= 2000))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[item]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute
+----------------hashAgg[LOCAL]
------------------PhysicalProject
---------------------PhysicalOlapScan[web_sales]
-------------------PhysicalDistribute
---------------------PhysicalProject
-----------------------filter((d3.d_year <= 2002)(d3.d_year >= 2000))
-------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute
+--------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk = ics.i_item_sk)
+----------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d2.d_date_sk)
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[catalog_sales]
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------filter((d2.d_year >= 2000)(d2.d_year <= 2002))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[item]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute
+----------------hashAgg[LOCAL]
------------------PhysicalProject
---------------------PhysicalOlapScan[item]
---------PhysicalDistribute
-----------PhysicalProject
-------------PhysicalOlapScan[item]
+--------------------hashJoin[INNER_JOIN](web_sales.ws_item_sk = iws.i_item_sk)
+----------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = d3.d_date_sk)
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[web_sales]
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------filter((d3.d_year <= 2002)(d3.d_year >= 2000))
+------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[item]
--CteAnchor[cteId= ( CTEId#10=] )
----CteProducer[cteId= ( CTEId#10=] )
------hashAgg[GLOBAL]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org