You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/31 15:46:37 UTC

[doris] branch master updated: [Enhancement](grouping) Add a switch for users to force using alias name in group by and having clause (#15748)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cd457312e4 [Enhancement](grouping) Add a switch for users to force using alias name in group by and having clause (#15748)
cd457312e4 is described below

commit cd457312e4fd5964de9415f8c0acb64d0cd0f362
Author: xy720 <22...@users.noreply.github.com>
AuthorDate: Tue Jan 31 23:46:31 2023 +0800

    [Enhancement](grouping) Add a switch for users to force using alias name in group by and having clause (#15748)
---
 docs/en/docs/advanced/variables.md                 |  3 ++
 docs/zh-CN/docs/advanced/variables.md              |  4 ++
 .../java/org/apache/doris/analysis/SelectStmt.java | 51 ++++++++++++++--------
 .../java/org/apache/doris/qe/SessionVariable.java  | 11 +++++
 .../correctness_p0/test_group_having_alias.out     | 10 +++++
 .../correctness_p0/test_group_having_alias.groovy  | 35 +++++++++++++++
 6 files changed, 97 insertions(+), 17 deletions(-)

diff --git a/docs/en/docs/advanced/variables.md b/docs/en/docs/advanced/variables.md
index d23825f722..050bcfd869 100644
--- a/docs/en/docs/advanced/variables.md
+++ b/docs/en/docs/advanced/variables.md
@@ -564,3 +564,6 @@ Translated with www.DeepL.com/Translator (free version)
 
     The default threshold of rewriting OR to IN. The default value is 2, which means that when there are 2 ORs, if they can be compact, they will be rewritten as IN predicate.
 
+*   `group_by_and_having_use_alias_first`
+
+    Specifies whether group by and having clauses use column aliases rather than searching for column name in From clause. The default value is false.
diff --git a/docs/zh-CN/docs/advanced/variables.md b/docs/zh-CN/docs/advanced/variables.md
index d4f976314c..5c5c1530d2 100644
--- a/docs/zh-CN/docs/advanced/variables.md
+++ b/docs/zh-CN/docs/advanced/variables.md
@@ -551,4 +551,8 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3);
 * `rewrite_or_to_in_predicate_threshold`
 
     默认的改写OR to IN的OR数量阈值。默认值为2,即表示有2个OR的时候,如果可以合并,则会改写成IN。
+
+*   `group_by_and_having_use_alias_first`
+
+       指定group by和having语句是否优先使用列的别名,而非从From语句里寻找列的名字。默认为false。
 	
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index 55f8a8824e..3f5950f5f7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -1135,23 +1135,32 @@ public class SelectStmt extends QueryStmt {
              *     select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1);
              */
             if (groupByClause != null) {
-                ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone();
-                List<Expr> havingSlots = Lists.newArrayList();
-                havingClause.collect(SlotRef.class, havingSlots);
-                for (Expr expr : havingSlots) {
-                    if (excludeAliasSMap.get(expr) == null) {
-                        continue;
-                    }
-                    try {
-                        // try to use column name firstly
-                        expr.clone().analyze(analyzer);
-                        // analyze success means column name exist, do not use alias name
-                        excludeAliasSMap.removeByLhsExpr(expr);
-                    } catch (AnalysisException ex) {
-                        // according to case3, column name do not exist, keep alias name inside alias map
+                boolean aliasFirst = false;
+                if (analyzer.getContext() != null) {
+                    aliasFirst = analyzer.getContext().getSessionVariable().isGroupByAndHavingUseAliasFirst();
+                }
+                if (!aliasFirst) {
+                    ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone();
+                    List<Expr> havingSlots = Lists.newArrayList();
+                    havingClause.collect(SlotRef.class, havingSlots);
+                    for (Expr expr : havingSlots) {
+                        if (excludeAliasSMap.get(expr) == null) {
+                            continue;
+                        }
+                        try {
+                            // try to use column name firstly
+                            expr.clone().analyze(analyzer);
+                            // analyze success means column name exist, do not use alias name
+                            excludeAliasSMap.removeByLhsExpr(expr);
+                        } catch (AnalysisException ex) {
+                            // according to case3, column name do not exist, keep alias name inside alias map
+                        }
                     }
+                    havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false);
+                } else {
+                    // If user set force using alias, then having clauses prefer using alias rather than column name
+                    havingClauseAfterAnaylzed = havingClause.substitute(aliasSMap, analyzer, false);
                 }
-                havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false);
             } else {
                 // according to mysql
                 // if there is no group by clause, the having clause should use alias
@@ -1268,7 +1277,11 @@ public class SelectStmt extends QueryStmt {
                 groupingInfo.buildRepeat(groupingExprs, groupByClause.getGroupingSetList());
             }
 
-            substituteOrdinalsAliases(groupingExprs, "GROUP BY", analyzer, false);
+            boolean aliasFirst = false;
+            if (analyzer.getContext() != null) {
+                aliasFirst = analyzer.getContext().getSessionVariable().isGroupByAndHavingUseAliasFirst();
+            }
+            substituteOrdinalsAliases(groupingExprs, "GROUP BY", analyzer, aliasFirst);
 
             if (!groupByClause.isGroupByExtension() && !groupingExprs.isEmpty()) {
                 ArrayList<Expr> tempExprs = new ArrayList<>(groupingExprs);
@@ -2241,7 +2254,11 @@ public class SelectStmt extends QueryStmt {
         }
         // substitute group by
         if (groupByClause != null) {
-            substituteOrdinalsAliases(groupByClause.getGroupingExprs(), "GROUP BY", analyzer, false);
+            boolean aliasFirst = false;
+            if (analyzer.getContext() != null) {
+                aliasFirst = analyzer.getContext().getSessionVariable().isGroupByAndHavingUseAliasFirst();
+            }
+            substituteOrdinalsAliases(groupByClause.getGroupingExprs(), "GROUP BY", analyzer, aliasFirst);
         }
         // substitute having
         if (havingClause != null) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index aae17c7ed7..a7cf5db874 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -260,6 +260,8 @@ public class SessionVariable implements Serializable, Writable {
     public static final String ENABLE_TWO_PHASE_READ_OPT = "enable_two_phase_read_opt";
     public static final String TWO_PHASE_READ_OPT_LIMIT_THRESHOLD = "two_phase_read_opt_limit_threshold";
 
+    public static final String GROUP_BY_AND_HAVING_USE_ALIAS_FIRST = "group_by_and_having_use_alias_first";
+
     // session origin value
     public Map<Field, String> sessionOriginValue = new HashMap<Field, String>();
     // check stmt is or not [select /*+ SET_VAR(...)*/ ...]
@@ -679,6 +681,11 @@ public class SessionVariable implements Serializable, Writable {
     @VariableMgr.VarAttr(name = TWO_PHASE_READ_OPT_LIMIT_THRESHOLD)
     public long twoPhaseReadLimitThreshold = 512;
 
+    // Default value is false, which means the group by and having clause
+    // should first use column name not alias. According to mysql.
+    @VariableMgr.VarAttr(name = GROUP_BY_AND_HAVING_USE_ALIAS_FIRST)
+    public boolean groupByAndHavingUseAliasFirst = false;
+
     // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
     // not the default value set in the code.
     public void initFuzzyModeVariables() {
@@ -1238,6 +1245,10 @@ public class SessionVariable implements Serializable, Writable {
         return extractWideRangeExpr;
     }
 
+    public boolean isGroupByAndHavingUseAliasFirst() {
+        return groupByAndHavingUseAliasFirst;
+    }
+
     public int getCpuResourceLimit() {
         return cpuResourceLimit;
     }
diff --git a/regression-test/data/correctness_p0/test_group_having_alias.out b/regression-test/data/correctness_p0/test_group_having_alias.out
index 19c07ac008..cc442bedcc 100644
--- a/regression-test/data/correctness_p0/test_group_having_alias.out
+++ b/regression-test/data/correctness_p0/test_group_having_alias.out
@@ -10,6 +10,16 @@
 202245
 202245
 
+-- !sql --
+
+-- !sql --
+202245	3
+
+-- !sql --
+202245
+202245
+202245
+
 -- !case1 --
 2	2
 2	3
diff --git a/regression-test/suites/correctness_p0/test_group_having_alias.groovy b/regression-test/suites/correctness_p0/test_group_having_alias.groovy
index 52b768cd8a..fc5b09e8d1 100644
--- a/regression-test/suites/correctness_p0/test_group_having_alias.groovy
+++ b/regression-test/suites/correctness_p0/test_group_having_alias.groovy
@@ -71,6 +71,41 @@
         ORDER BY date;
     """
 
+    sql """set group_by_and_having_use_alias_first=true"""
+
+    qt_sql """
+        SELECT
+        date_format(date, '%x%v') AS `date`,
+        count(date) AS `diff_days`
+        FROM `tb_holiday`
+        WHERE `date` between 20221111 AND 20221116
+        GROUP BY date
+        HAVING date = 20221111
+        ORDER BY date;
+    """
+
+    qt_sql """
+        SELECT
+        date_format(date, '%x%v') AS `date2`,
+        count(date) AS `diff_days`
+        FROM `tb_holiday`
+        WHERE `date` between 20221111 AND 20221116
+        GROUP BY date2
+        HAVING date2 = 202245
+        ORDER BY date2;
+    """
+
+    qt_sql """
+        SELECT
+        date_format(date, '%x%v') AS `date`
+        FROM `tb_holiday`
+        WHERE `date` between 20221111 AND 20221116
+        HAVING date = 202245
+        ORDER BY date;
+    """
+
+    sql """set group_by_and_having_use_alias_first=false"""
+
     sql """
          CREATE TABLE `test_having_alias_tb` (
           `id` int(11) NULL,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org