You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/22 15:54:16 UTC
[doris] branch master updated: [feature](Nereids) support syntax SELECT DISTINCT (#15197)
This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 09a22813e4 [feature](Nereids) support syntax SELECT DISTINCT (#15197)
09a22813e4 is described below
commit 09a22813e4b5240c5988473307576a5ed42a0000
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Thu Dec 22 23:54:08 2022 +0800
[feature](Nereids) support syntax SELECT DISTINCT (#15197)
Add a new rule 'ProjectWithDistinctToAggregate' to support "select distinct xx from table".
This rule check's the logicalProject node's isDisinct property and replace the logicalProject node with a LogicalAggregate node.
So any rule before this, if createing a new logicalProject node, should make sure isDisinct property is correctly passed around.
please see rule BindSlotReference or BindFunction for example.
---
.../antlr4/org/apache/doris/nereids/DorisParser.g4 | 2 +-
.../doris/nereids/jobs/batch/AnalyzeRulesJob.java | 7 +++
.../doris/nereids/parser/LogicalPlanBuilder.java | 17 +++---
.../org/apache/doris/nereids/rules/RuleType.java | 1 +
.../doris/nereids/rules/analysis/BindFunction.java | 2 +-
.../nereids/rules/analysis/BindSlotReference.java | 2 +-
.../analysis/ProjectWithDistinctToAggregate.java | 63 ++++++++++++++++++++++
.../trees/plans/logical/LogicalProject.java | 41 ++++++++++----
.../data/query_p0/keyword/test_keyword.out | 9 ++++
.../suites/query_p0/keyword/test_keyword.groovy | 7 +++
10 files changed, 133 insertions(+), 18 deletions(-)
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index e17a593648..939b2059c6 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -107,7 +107,7 @@ columnAliases
;
selectClause
- : SELECT selectHint? selectColumnClause
+ : SELECT selectHint? DISTINCT? selectColumnClause
;
selectColumnClause
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java
index d72837aee8..fc7fd78856 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java
@@ -25,6 +25,7 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy;
import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots;
import org.apache.doris.nereids.rules.analysis.NormalizeRepeat;
import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate;
+import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate;
import org.apache.doris.nereids.rules.analysis.RegisterCTE;
import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput;
import org.apache.doris.nereids.rules.analysis.ResolveOrdinalInOrderByAndGroupBy;
@@ -59,6 +60,12 @@ public class AnalyzeRulesJob extends BatchRulesJob {
new BindSlotReference(scope),
new BindFunction(),
new ProjectToGlobalAggregate(),
+ // this rule check's the logicalProject node's isDisinct property
+ // and replace the logicalProject node with a LogicalAggregate node
+ // so any rule before this, if create a new logicalProject node
+ // should make sure isDisinct property is correctly passed around.
+ // please see rule BindSlotReference or BindFunction for example
+ new ProjectWithDistinctToAggregate(),
new ResolveOrdinalInOrderByAndGroupBy(),
new ReplaceExpressionByChildOutput(),
new HideOneRowRelationUnderUnion()
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index a7f5fd915c..2801b3454d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -1145,6 +1145,11 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
LogicalPlan filter = withFilter(inputRelation, whereClause);
SelectColumnClauseContext selectColumnCtx = selectClause.selectColumnClause();
LogicalPlan aggregate = withAggregate(filter, selectColumnCtx, aggClause);
+ boolean isDistinct = (selectClause.DISTINCT() != null);
+ if (isDistinct && aggregate instanceof Aggregate) {
+ throw new ParseException("cannot combine SELECT DISTINCT with aggregate functions or GROUP BY",
+ selectClause);
+ }
// TODO: replace and process having at this position
if (!(aggregate instanceof Aggregate) && havingClause.isPresent()) {
// create a project node for pattern match of ProjectToGlobalAggregate rule
@@ -1156,15 +1161,15 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
throw new ParseException("only column name is supported in except clause", selectColumnCtx);
}
project = new LogicalProject<>(ImmutableList.of(new UnboundStar(Collections.emptyList())),
- expressions, aggregate);
+ expressions, aggregate, isDistinct);
} else {
List<NamedExpression> projects = getNamedExpressions(selectColumnCtx.namedExpressionSeq());
- project = new LogicalProject<>(projects, Collections.emptyList(), aggregate);
+ project = new LogicalProject<>(projects, Collections.emptyList(), aggregate, isDistinct);
}
return new LogicalHaving<>(getExpression((havingClause.get().booleanExpression())), project);
} else {
LogicalPlan having = withHaving(aggregate, havingClause);
- return withProjection(having, selectColumnCtx, aggClause);
+ return withProjection(having, selectColumnCtx, aggClause, isDistinct);
}
});
}
@@ -1280,7 +1285,7 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
}
private LogicalPlan withProjection(LogicalPlan input, SelectColumnClauseContext selectCtx,
- Optional<AggClauseContext> aggCtx) {
+ Optional<AggClauseContext> aggCtx, boolean isDistinct) {
return ParserUtils.withOrigin(selectCtx, () -> {
// TODO: skip if havingClause exists
if (aggCtx.isPresent()) {
@@ -1292,10 +1297,10 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
throw new ParseException("only column name is supported in except clause", selectCtx);
}
return new LogicalProject<>(ImmutableList.of(new UnboundStar(Collections.emptyList())),
- expressions, input);
+ expressions, input, isDistinct);
} else {
List<NamedExpression> projects = getNamedExpressions(selectCtx.namedExpressionSeq());
- return new LogicalProject<>(projects, Collections.emptyList(), input);
+ return new LogicalProject<>(projects, Collections.emptyList(), input, isDistinct);
}
}
});
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 30fb8b9e23..b0d3844a19 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -65,6 +65,7 @@ public enum RuleType {
RESOLVE_PROJECT_ALIAS(RuleTypeClass.REWRITE),
RESOLVE_AGGREGATE_ALIAS(RuleTypeClass.REWRITE),
PROJECT_TO_GLOBAL_AGGREGATE(RuleTypeClass.REWRITE),
+ PROJECT_WITH_DISTINCT_TO_AGGREGATE(RuleTypeClass.REWRITE),
REGISTER_CTE(RuleTypeClass.REWRITE),
RELATION_AUTHENTICATION(RuleTypeClass.VALIDATION),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java
index 61e46c7f68..0ddb802554 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java
@@ -75,7 +75,7 @@ public class BindFunction implements AnalysisRuleFactory {
logicalProject().thenApply(ctx -> {
LogicalProject<GroupPlan> project = ctx.root;
List<NamedExpression> boundExpr = bind(project.getProjects(), ctx.connectContext.getEnv());
- return new LogicalProject<>(boundExpr, project.child());
+ return new LogicalProject<>(boundExpr, project.child(), project.isDistinct());
})
),
RuleType.BINDING_AGGREGATE_FUNCTION.build(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java
index 12f107c369..f7a126a411 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java
@@ -111,7 +111,7 @@ public class BindSlotReference implements AnalysisRuleFactory {
ctx.cascadesContext);
List<NamedExpression> newOutput = flatBoundStar(adjustNullableForProjects(project, boundSlots));
newOutput.removeAll(exceptSlots);
- return new LogicalProject<>(newOutput, project.child());
+ return new LogicalProject<>(newOutput, project.child(), project.isDistinct());
})
),
RuleType.BINDING_FILTER_SLOT.build(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
new file mode 100644
index 0000000000..1b6b270ab7
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+
+import com.google.common.collect.Lists;
+
+/**
+ * ProjectWithDistinctToAggregate.
+ *
+ * example sql:
+ * <pre>
+ * select distinct value
+ * from tbl
+ * </pre>
+ *
+ * origin plan: transformed plan:
+ *
+ * LogicalProject(projects=[distinct value]) LogicalAggregate(groupBy=[value], output=[value])
+ * | => |
+ * LogicalOlapScan(table=tbl) LogicalOlapScan(table=tbl)
+ */
+public class ProjectWithDistinctToAggregate extends OneAnalysisRuleFactory {
+ @Override
+ public Rule build() {
+ return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build(
+ logicalProject().then(project -> {
+ if (project.isDistinct() && project.getProjects()
+ .stream()
+ .noneMatch(this::hasAggregateFunction)) {
+ return new LogicalAggregate<>(Lists.newArrayList(project.getProjects()), project.getProjects(),
+ project.child());
+ } else {
+ return project;
+ }
+ })
+ );
+ }
+
+ private boolean hasAggregateFunction(Expression expression) {
+ return expression.anyMatch(AggregateFunction.class::isInstance);
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
index 0f03bc0883..705ab57e6d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
@@ -47,17 +47,33 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
// For project nodes under union, erasure cannot be configured, so add this flag.
private final boolean canEliminate;
+ private final boolean isDistinct;
+
public LogicalProject(List<NamedExpression> projects, CHILD_TYPE child) {
- this(projects, Collections.emptyList(), true, child);
+ this(projects, Collections.emptyList(), true, child, false);
}
public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, CHILD_TYPE child) {
- this(projects, excepts, true, child);
+ this(projects, excepts, true, child, false);
}
public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
boolean canEliminate, CHILD_TYPE child) {
- this(projects, excepts, canEliminate, Optional.empty(), Optional.empty(), child);
+ this(projects, excepts, canEliminate, Optional.empty(), Optional.empty(), child, false);
+ }
+
+ public LogicalProject(List<NamedExpression> projects, CHILD_TYPE child, boolean isDistinct) {
+ this(projects, Collections.emptyList(), true, child, isDistinct);
+ }
+
+ public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, CHILD_TYPE child,
+ boolean isDistinct) {
+ this(projects, excepts, true, child, isDistinct);
+ }
+
+ public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
+ boolean canEliminate, CHILD_TYPE child, boolean isDistinct) {
+ this(projects, excepts, canEliminate, Optional.empty(), Optional.empty(), child, isDistinct);
}
/**
@@ -67,11 +83,12 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
*/
public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, boolean canEliminate,
Optional<GroupExpression> groupExpression, Optional<LogicalProperties> logicalProperties,
- CHILD_TYPE child) {
+ CHILD_TYPE child, boolean isDistinct) {
super(PlanType.LOGICAL_PROJECT, groupExpression, logicalProperties, child);
this.projects = ImmutableList.copyOf(Objects.requireNonNull(projects, "projects can not be null"));
this.excepts = ImmutableList.copyOf(excepts);
this.canEliminate = canEliminate;
+ this.isDistinct = isDistinct;
}
/**
@@ -125,7 +142,8 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
LogicalProject that = (LogicalProject) o;
return projects.equals(that.projects)
&& excepts.equals(that.excepts)
- && canEliminate == that.canEliminate;
+ && canEliminate == that.canEliminate
+ && isDistinct == that.isDistinct;
}
@Override
@@ -136,18 +154,19 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
@Override
public LogicalUnary<Plan> withChildren(List<Plan> children) {
Preconditions.checkArgument(children.size() == 1);
- return new LogicalProject<>(projects, excepts, canEliminate, children.get(0));
+ return new LogicalProject<>(projects, excepts, canEliminate, children.get(0), isDistinct);
}
@Override
public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
return new LogicalProject<>(projects, excepts, canEliminate,
- groupExpression, Optional.of(getLogicalProperties()), child());
+ groupExpression, Optional.of(getLogicalProperties()), child(), isDistinct);
}
@Override
public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
- return new LogicalProject<>(projects, excepts, canEliminate, Optional.empty(), logicalProperties, child());
+ return new LogicalProject<>(projects, excepts, canEliminate, Optional.empty(), logicalProperties, child(),
+ isDistinct);
}
public boolean canEliminate() {
@@ -155,6 +174,10 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
}
public Plan withEliminate(boolean isEliminate) {
- return new LogicalProject<>(projects, excepts, isEliminate, child());
+ return new LogicalProject<>(projects, excepts, isEliminate, child(), isDistinct);
+ }
+
+ public boolean isDistinct() {
+ return isDistinct;
}
}
diff --git a/regression-test/data/query_p0/keyword/test_keyword.out b/regression-test/data/query_p0/keyword/test_keyword.out
index f369ce6a90..b5a4681ac5 100644
--- a/regression-test/data/query_p0/keyword/test_keyword.out
+++ b/regression-test/data/query_p0/keyword/test_keyword.out
@@ -642,3 +642,12 @@ true 15 1992 3021 11011920 0 true 9999-12-12 2015-04-02T00:00 3.141592653 20.45
1 \N
\N 2
+-- !distinct --
+FALSE
+TRUE
+
+-- !distinct --
+false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727
+false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727
+false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0
+
diff --git a/regression-test/suites/query_p0/keyword/test_keyword.groovy b/regression-test/suites/query_p0/keyword/test_keyword.groovy
index ba47584d54..c30308ac88 100644
--- a/regression-test/suites/query_p0/keyword/test_keyword.groovy
+++ b/regression-test/suites/query_p0/keyword/test_keyword.groovy
@@ -109,4 +109,11 @@ suite("test_keyword", "query,p0") {
try_sql "select k1 as k7, k2 as k8, k3 as k9 from baseall t group by k7, k8, k9 having k7 > 5 \
order by k7;"
try_sql "select k1 as k7, k2 as k8, k3 as k9 from baseall t where k8 > 0 group by k7, k8, k9 having k7 > 5 order by k7;"
+
+ sql 'set enable_vectorized_engine=true'
+ sql 'set enable_nereids_planner=true'
+ sql 'set enable_fallback_to_original_planner=false'
+
+ qt_distinct "select distinct upper(k6) from ${tableName1} order by upper(k6)"
+ qt_distinct "select distinct * from ${tableName1} where k1<20 order by k1, k2, k3, k4"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org