You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/22 15:54:16 UTC

[doris] branch master updated: [feature](Nereids) support syntax SELECT DISTINCT (#15197)

This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 09a22813e4 [feature](Nereids) support syntax SELECT DISTINCT (#15197)
09a22813e4 is described below

commit 09a22813e4b5240c5988473307576a5ed42a0000
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Thu Dec 22 23:54:08 2022 +0800

    [feature](Nereids) support syntax SELECT DISTINCT (#15197)
    
    Add a new rule 'ProjectWithDistinctToAggregate' to support "select distinct xx from table".
    This rule check's the logicalProject node's isDisinct property and replace the logicalProject node with a LogicalAggregate node.
    So any rule before this, if createing a new logicalProject node, should make sure isDisinct property is correctly passed around.
    please see rule BindSlotReference or BindFunction for example.
---
 .../antlr4/org/apache/doris/nereids/DorisParser.g4 |  2 +-
 .../doris/nereids/jobs/batch/AnalyzeRulesJob.java  |  7 +++
 .../doris/nereids/parser/LogicalPlanBuilder.java   | 17 +++---
 .../org/apache/doris/nereids/rules/RuleType.java   |  1 +
 .../doris/nereids/rules/analysis/BindFunction.java |  2 +-
 .../nereids/rules/analysis/BindSlotReference.java  |  2 +-
 .../analysis/ProjectWithDistinctToAggregate.java   | 63 ++++++++++++++++++++++
 .../trees/plans/logical/LogicalProject.java        | 41 ++++++++++----
 .../data/query_p0/keyword/test_keyword.out         |  9 ++++
 .../suites/query_p0/keyword/test_keyword.groovy    |  7 +++
 10 files changed, 133 insertions(+), 18 deletions(-)

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index e17a593648..939b2059c6 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -107,7 +107,7 @@ columnAliases
     ;
 
 selectClause
-    : SELECT selectHint? selectColumnClause
+    : SELECT selectHint? DISTINCT? selectColumnClause
     ;
 
 selectColumnClause
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java
index d72837aee8..fc7fd78856 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/batch/AnalyzeRulesJob.java
@@ -25,6 +25,7 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy;
 import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots;
 import org.apache.doris.nereids.rules.analysis.NormalizeRepeat;
 import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate;
+import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate;
 import org.apache.doris.nereids.rules.analysis.RegisterCTE;
 import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput;
 import org.apache.doris.nereids.rules.analysis.ResolveOrdinalInOrderByAndGroupBy;
@@ -59,6 +60,12 @@ public class AnalyzeRulesJob extends BatchRulesJob {
                     new BindSlotReference(scope),
                     new BindFunction(),
                     new ProjectToGlobalAggregate(),
+                    // this rule check's the logicalProject node's isDisinct property
+                    // and replace the logicalProject node with a LogicalAggregate node
+                    // so any rule before this, if create a new logicalProject node
+                    // should make sure isDisinct property is correctly passed around.
+                    // please see rule BindSlotReference or BindFunction for example
+                    new ProjectWithDistinctToAggregate(),
                     new ResolveOrdinalInOrderByAndGroupBy(),
                     new ReplaceExpressionByChildOutput(),
                     new HideOneRowRelationUnderUnion()
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index a7f5fd915c..2801b3454d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -1145,6 +1145,11 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
             LogicalPlan filter = withFilter(inputRelation, whereClause);
             SelectColumnClauseContext selectColumnCtx = selectClause.selectColumnClause();
             LogicalPlan aggregate = withAggregate(filter, selectColumnCtx, aggClause);
+            boolean isDistinct = (selectClause.DISTINCT() != null);
+            if (isDistinct && aggregate instanceof Aggregate) {
+                throw new ParseException("cannot combine SELECT DISTINCT with aggregate functions or GROUP BY",
+                        selectClause);
+            }
             // TODO: replace and process having at this position
             if (!(aggregate instanceof Aggregate) && havingClause.isPresent()) {
                 // create a project node for pattern match of ProjectToGlobalAggregate rule
@@ -1156,15 +1161,15 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
                         throw new ParseException("only column name is supported in except clause", selectColumnCtx);
                     }
                     project = new LogicalProject<>(ImmutableList.of(new UnboundStar(Collections.emptyList())),
-                        expressions, aggregate);
+                        expressions, aggregate, isDistinct);
                 } else {
                     List<NamedExpression> projects = getNamedExpressions(selectColumnCtx.namedExpressionSeq());
-                    project = new LogicalProject<>(projects, Collections.emptyList(), aggregate);
+                    project = new LogicalProject<>(projects, Collections.emptyList(), aggregate, isDistinct);
                 }
                 return new LogicalHaving<>(getExpression((havingClause.get().booleanExpression())), project);
             } else {
                 LogicalPlan having = withHaving(aggregate, havingClause);
-                return withProjection(having, selectColumnCtx, aggClause);
+                return withProjection(having, selectColumnCtx, aggClause, isDistinct);
             }
         });
     }
@@ -1280,7 +1285,7 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
     }
 
     private LogicalPlan withProjection(LogicalPlan input, SelectColumnClauseContext selectCtx,
-                                       Optional<AggClauseContext> aggCtx) {
+                                       Optional<AggClauseContext> aggCtx, boolean isDistinct) {
         return ParserUtils.withOrigin(selectCtx, () -> {
             // TODO: skip if havingClause exists
             if (aggCtx.isPresent()) {
@@ -1292,10 +1297,10 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
                         throw new ParseException("only column name is supported in except clause", selectCtx);
                     }
                     return new LogicalProject<>(ImmutableList.of(new UnboundStar(Collections.emptyList())),
-                            expressions, input);
+                            expressions, input, isDistinct);
                 } else {
                     List<NamedExpression> projects = getNamedExpressions(selectCtx.namedExpressionSeq());
-                    return new LogicalProject<>(projects, Collections.emptyList(), input);
+                    return new LogicalProject<>(projects, Collections.emptyList(), input, isDistinct);
                 }
             }
         });
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 30fb8b9e23..b0d3844a19 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -65,6 +65,7 @@ public enum RuleType {
     RESOLVE_PROJECT_ALIAS(RuleTypeClass.REWRITE),
     RESOLVE_AGGREGATE_ALIAS(RuleTypeClass.REWRITE),
     PROJECT_TO_GLOBAL_AGGREGATE(RuleTypeClass.REWRITE),
+    PROJECT_WITH_DISTINCT_TO_AGGREGATE(RuleTypeClass.REWRITE),
     REGISTER_CTE(RuleTypeClass.REWRITE),
 
     RELATION_AUTHENTICATION(RuleTypeClass.VALIDATION),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java
index 61e46c7f68..0ddb802554 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindFunction.java
@@ -75,7 +75,7 @@ public class BindFunction implements AnalysisRuleFactory {
                 logicalProject().thenApply(ctx -> {
                     LogicalProject<GroupPlan> project = ctx.root;
                     List<NamedExpression> boundExpr = bind(project.getProjects(), ctx.connectContext.getEnv());
-                    return new LogicalProject<>(boundExpr, project.child());
+                    return new LogicalProject<>(boundExpr, project.child(), project.isDistinct());
                 })
             ),
             RuleType.BINDING_AGGREGATE_FUNCTION.build(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java
index 12f107c369..f7a126a411 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSlotReference.java
@@ -111,7 +111,7 @@ public class BindSlotReference implements AnalysisRuleFactory {
                             ctx.cascadesContext);
                     List<NamedExpression> newOutput = flatBoundStar(adjustNullableForProjects(project, boundSlots));
                     newOutput.removeAll(exceptSlots);
-                    return new LogicalProject<>(newOutput, project.child());
+                    return new LogicalProject<>(newOutput, project.child(), project.isDistinct());
                 })
             ),
             RuleType.BINDING_FILTER_SLOT.build(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
new file mode 100644
index 0000000000..1b6b270ab7
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+
+import com.google.common.collect.Lists;
+
+/**
+ * ProjectWithDistinctToAggregate.
+ *
+ * example sql:
+ * <pre>
+ * select distinct value
+ * from tbl
+ * </pre>
+ *
+ * origin plan:                                                 transformed plan:
+ *
+ * LogicalProject(projects=[distinct value])                        LogicalAggregate(groupBy=[value], output=[value])
+ *            |                                      =>                              |
+ *  LogicalOlapScan(table=tbl)                                                  LogicalOlapScan(table=tbl)
+ */
+public class ProjectWithDistinctToAggregate extends OneAnalysisRuleFactory {
+    @Override
+    public Rule build() {
+        return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build(
+                logicalProject().then(project -> {
+                    if (project.isDistinct() && project.getProjects()
+                            .stream()
+                            .noneMatch(this::hasAggregateFunction)) {
+                        return new LogicalAggregate<>(Lists.newArrayList(project.getProjects()), project.getProjects(),
+                                project.child());
+                    } else {
+                        return project;
+                    }
+                })
+        );
+    }
+
+    private boolean hasAggregateFunction(Expression expression) {
+        return expression.anyMatch(AggregateFunction.class::isInstance);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
index 0f03bc0883..705ab57e6d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
@@ -47,17 +47,33 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
     // For project nodes under union, erasure cannot be configured, so add this flag.
     private final boolean canEliminate;
 
+    private final boolean isDistinct;
+
     public LogicalProject(List<NamedExpression> projects, CHILD_TYPE child) {
-        this(projects, Collections.emptyList(), true, child);
+        this(projects, Collections.emptyList(), true, child, false);
     }
 
     public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, CHILD_TYPE child) {
-        this(projects, excepts, true, child);
+        this(projects, excepts, true, child, false);
     }
 
     public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
                           boolean canEliminate, CHILD_TYPE child) {
-        this(projects, excepts, canEliminate, Optional.empty(), Optional.empty(), child);
+        this(projects, excepts, canEliminate, Optional.empty(), Optional.empty(), child, false);
+    }
+
+    public LogicalProject(List<NamedExpression> projects, CHILD_TYPE child, boolean isDistinct) {
+        this(projects, Collections.emptyList(), true, child, isDistinct);
+    }
+
+    public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, CHILD_TYPE child,
+                            boolean isDistinct) {
+        this(projects, excepts, true, child, isDistinct);
+    }
+
+    public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
+                          boolean canEliminate, CHILD_TYPE child, boolean isDistinct) {
+        this(projects, excepts, canEliminate, Optional.empty(), Optional.empty(), child, isDistinct);
     }
 
     /**
@@ -67,11 +83,12 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
      */
     public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, boolean canEliminate,
             Optional<GroupExpression> groupExpression, Optional<LogicalProperties> logicalProperties,
-            CHILD_TYPE child) {
+            CHILD_TYPE child, boolean isDistinct) {
         super(PlanType.LOGICAL_PROJECT, groupExpression, logicalProperties, child);
         this.projects = ImmutableList.copyOf(Objects.requireNonNull(projects, "projects can not be null"));
         this.excepts = ImmutableList.copyOf(excepts);
         this.canEliminate = canEliminate;
+        this.isDistinct = isDistinct;
     }
 
     /**
@@ -125,7 +142,8 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
         LogicalProject that = (LogicalProject) o;
         return projects.equals(that.projects)
                 && excepts.equals(that.excepts)
-                && canEliminate == that.canEliminate;
+                && canEliminate == that.canEliminate
+                && isDistinct == that.isDistinct;
     }
 
     @Override
@@ -136,18 +154,19 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
     @Override
     public LogicalUnary<Plan> withChildren(List<Plan> children) {
         Preconditions.checkArgument(children.size() == 1);
-        return new LogicalProject<>(projects, excepts, canEliminate, children.get(0));
+        return new LogicalProject<>(projects, excepts, canEliminate, children.get(0), isDistinct);
     }
 
     @Override
     public Plan withGroupExpression(Optional<GroupExpression> groupExpression) {
         return new LogicalProject<>(projects, excepts, canEliminate,
-                groupExpression, Optional.of(getLogicalProperties()), child());
+                groupExpression, Optional.of(getLogicalProperties()), child(), isDistinct);
     }
 
     @Override
     public Plan withLogicalProperties(Optional<LogicalProperties> logicalProperties) {
-        return new LogicalProject<>(projects, excepts, canEliminate, Optional.empty(), logicalProperties, child());
+        return new LogicalProject<>(projects, excepts, canEliminate, Optional.empty(), logicalProperties, child(),
+                isDistinct);
     }
 
     public boolean canEliminate() {
@@ -155,6 +174,10 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
     }
 
     public Plan withEliminate(boolean isEliminate) {
-        return new LogicalProject<>(projects, excepts, isEliminate, child());
+        return new LogicalProject<>(projects, excepts, isEliminate, child(), isDistinct);
+    }
+
+    public boolean isDistinct() {
+        return isDistinct;
     }
 }
diff --git a/regression-test/data/query_p0/keyword/test_keyword.out b/regression-test/data/query_p0/keyword/test_keyword.out
index f369ce6a90..b5a4681ac5 100644
--- a/regression-test/data/query_p0/keyword/test_keyword.out
+++ b/regression-test/data/query_p0/keyword/test_keyword.out
@@ -642,3 +642,12 @@ true	15	1992	3021	11011920	0	true	9999-12-12	2015-04-02T00:00		3.141592653	20.45
 1	\N
 \N	2
 
+-- !distinct --
+FALSE
+TRUE
+
+-- !distinct --
+false	1	1989	1001	11011902	123.123	true	1989-03-21	1989-03-21T13:00	wangjuoo4	0.1	6.333	string12345	170141183460469231731687303715884105727
+false	2	1986	1001	11011903	1243.500	false	1901-12-31	1989-03-21T13:00	wangynnsf	20.268	789.25	string12345	-170141183460469231731687303715884105727
+false	3	1989	1002	11011905	24453.325	false	2012-03-14	2000-01-01T00:00	yunlj8@nk	78945.0	3654.0	string12345	0
+
diff --git a/regression-test/suites/query_p0/keyword/test_keyword.groovy b/regression-test/suites/query_p0/keyword/test_keyword.groovy
index ba47584d54..c30308ac88 100644
--- a/regression-test/suites/query_p0/keyword/test_keyword.groovy
+++ b/regression-test/suites/query_p0/keyword/test_keyword.groovy
@@ -109,4 +109,11 @@ suite("test_keyword", "query,p0") {
     try_sql "select k1 as k7, k2 as k8, k3 as k9 from baseall t group by k7, k8, k9 having k7 > 5 \
             order by k7;"
     try_sql "select k1 as k7, k2 as k8, k3 as k9 from baseall t where k8 > 0 group by k7, k8, k9 having k7 > 5 order by k7;"
+
+    sql 'set enable_vectorized_engine=true'
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+
+    qt_distinct "select distinct upper(k6) from ${tableName1} order by upper(k6)"
+    qt_distinct "select distinct * from ${tableName1} where k1<20 order by k1, k2, k3, k4"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org