You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/10 14:58:18 UTC

[doris] 02/02: [fix](having) revert 15143 and fix having clause with multi-conditions (#15745)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b7d2431ac3e0ac377bc081306fc2f176b1c73a25
Author: camby <10...@qq.com>
AuthorDate: Tue Jan 10 15:57:29 2023 +0800

    [fix](having) revert 15143 and fix having clause with multi-conditions (#15745)
    
    Describe your changes.
    
    Firstly having clause of Mysql is really very complex, we are hard to follow all rules, so we revert pr15143 to keep the logic the same as before.
    
    Secondly the origin implementation has problem while having clause has multi-conditions.
    For example:
    
    case1: here v2 inside having clause use table column test_having_alias_tb.v2
    SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v2>1);
    ERROR 1105 (HY000): errCode = 2, detailMessage = HAVING clause not produced by aggregation output (missing from GROUP BY clause?): (`v2` > 1)
    case2: here v2 inside having clause use alias name v2 =sum(test_having_alias_tb.v2), another condition make logic of v2 differently.
    SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v;
    +------+------+------+
    | id   | v    | v2   |
    +------+------+------+
    |    2 |    1 |    3 |
    +------+------+------+
    So here we try to make the having clause rules simple:
    Rule1: if alias name inside having clause is the same as column name, we use column name not alias name;
    Rule2: if alias name inside having clause do not have same name as column name, we use alias name;
    
    Co-authored-by: cambyzju <zh...@baidu.com>
---
 .../java/org/apache/doris/analysis/SelectStmt.java | 38 ++++++++++++----------
 .../correctness_p0/test_group_having_alias.out     |  6 ++++
 .../correctness_p0/test_group_having_alias.groovy  |  6 ++--
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index eac2cd4c26..fbb89c0fca 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -996,35 +996,37 @@ public class SelectStmt extends QueryStmt {
              * TODO: the a.key should be replaced by a.k1 instead of unknown column 'key' in 'a'
              */
 
-            // according to mysql
-            // having clause should use column name inside group by clause, prior to alias.
-            // case1: having clause use column name table.v1, because v1 inside group by clause
-            //     select id, sum(v1) v1 from table group by id,v1 having(v1>1);
-            // case2: having clause use alias name v2, because v2 is not inside group by clause
-            //     select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND v2>1);
-            // case3: having clause use alias name v, because table do not have column name v
-            //     select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
+            /* according to mysql (https://dev.mysql.com/doc/refman/8.0/en/select.html)
+             * "For GROUP BY or HAVING clauses, it searches the FROM clause before searching in the
+             * select_expr values. (For GROUP BY and HAVING, this differs from the pre-MySQL 5.0 behavior
+             * that used the same rules as for ORDER BY.)"
+             * case1: having clause use column name table.v1, because it searches the FROM clause firstly
+             *     select id, sum(v1) v1 from table group by id,v1 having(v1>1);
+             * case2: having clause used in aggregate functions, such as sum(v2) here
+             *     select id, sum(v1) v1, sum(v2) v2 from table group by id,v1 having(v1>1 AND sum(v2)>1);
+             * case3: having clause use alias name v, because table do not have column name v
+             *     select id, floor(v1) v, sum(v2) v2 from table group by id,v having(v>1 AND v2>1);
+             * case4: having clause use alias name vsum, because table do not have column name vsum
+             *     select id, floor(v1) v, sum(v2) vsum from table group by id,v having(v>1 AND vsum>1);
+             */
             if (groupByClause != null) {
-                ExprSubstitutionMap excludeGroupByaliasSMap = aliasSMap.clone();
-                // according to case2, maybe some having slots inside group by clause, some do not
-                List<Expr> groupBySlots = Lists.newArrayList();
-                for (Expr expr : groupByClause.getGroupingExprs()) {
-                    expr.collect(SlotRef.class, groupBySlots);
-                }
-                for (Expr expr : groupBySlots) {
-                    if (excludeGroupByaliasSMap.get(expr) == null) {
+                ExprSubstitutionMap excludeAliasSMap = aliasSMap.clone();
+                List<Expr> havingSlots = Lists.newArrayList();
+                havingClause.collect(SlotRef.class, havingSlots);
+                for (Expr expr : havingSlots) {
+                    if (excludeAliasSMap.get(expr) == null) {
                         continue;
                     }
                     try {
                         // try to use column name firstly
                         expr.clone().analyze(analyzer);
                         // analyze success means column name exist, do not use alias name
-                        excludeGroupByaliasSMap.removeByLhsExpr(expr);
+                        excludeAliasSMap.removeByLhsExpr(expr);
                     } catch (AnalysisException ex) {
                         // according to case3, column name do not exist, keep alias name inside alias map
                     }
                 }
-                havingClauseAfterAnaylzed = havingClause.substitute(excludeGroupByaliasSMap, analyzer, false);
+                havingClauseAfterAnaylzed = havingClause.substitute(excludeAliasSMap, analyzer, false);
             } else {
                 // according to mysql
                 // if there is no group by clause, the having clause should use alias
diff --git a/regression-test/data/correctness_p0/test_group_having_alias.out b/regression-test/data/correctness_p0/test_group_having_alias.out
index 8d3629b5f5..7a87076e28 100644
--- a/regression-test/data/correctness_p0/test_group_having_alias.out
+++ b/regression-test/data/correctness_p0/test_group_having_alias.out
@@ -20,3 +20,9 @@
 -- !case3 --
 2	1	3
 
+-- !case4 --
+2	1	3
+
+-- !case5 --
+2	3
+
diff --git a/regression-test/suites/correctness_p0/test_group_having_alias.groovy b/regression-test/suites/correctness_p0/test_group_having_alias.groovy
index 45350026e2..80187b9113 100644
--- a/regression-test/suites/correctness_p0/test_group_having_alias.groovy
+++ b/regression-test/suites/correctness_p0/test_group_having_alias.groovy
@@ -94,7 +94,9 @@
     """
     sql """ INSERT INTO test_having_alias_tb values(1,1,1),(2,2,2),(2,3,3); """
     qt_case1 """ SELECT id, sum(v1) v1 FROM test_having_alias_tb GROUP BY id,v1 having(v1>1) ORDER BY id,v1; """
-    qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND v2>1) ORDER BY id,v1; """
-    qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND v2>1) ORDER BY id,v; """
+    qt_case2 """ SELECT id, sum(v1) v1, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v1 having(v1!=2 AND sum(v2)>1) ORDER BY id,v1; """
+    qt_case3 """ SELECT id, v1-2 as v, sum(v2) v2 FROM test_having_alias_tb GROUP BY id,v having(v>0 AND sum(v2)>1) ORDER BY id,v; """
+    qt_case4 """ SELECT id, v1-2 as v, sum(v2) vsum FROM test_having_alias_tb GROUP BY id,v having(v>0 AND vsum>1) ORDER BY id,v; """
+    qt_case5 """ SELECT id, max(v1) v1 FROM test_having_alias_tb GROUP BY 1 having count(distinct v1)>1 ORDER BY id; """
     sql """ DROP TABLE IF EXISTS `test_having_alias_tb`; """
  } 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org