You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/29 01:11:40 UTC

[doris] branch master updated: [Vectorized] Support both distinct and order by of group_concat (#11278)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new bac280e803 [Vectorized] Support both distinct and order by of group_concat (#11278)
bac280e803 is described below

commit bac280e80309ee1edd7ea6d2317113794bd62555
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Fri Jul 29 09:11:34 2022 +0800

    [Vectorized] Support both distinct and order by of group_concat (#11278)
    
    Co-authored-by: lihaopeng <li...@baidu.com>
---
 .../aggregate-functions/group_concat.md              | 19 +++++++++++++++++--
 .../aggregate-functions/group_concat.md              | 17 +++++++++++++++--
 .../org/apache/doris/analysis/AggregateInfo.java     | 20 +++++++++++++++++---
 .../org/apache/doris/analysis/FunctionCallExpr.java  |  9 +++++----
 .../data/query/group_concat/test_group_concat.out    | 20 ++++++++++++++++++++
 .../query/group_concat/test_group_concat.groovy      |  7 +++++++
 6 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
index 7e0095cafd..41820022a2 100644
--- a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
@@ -28,10 +28,11 @@ under the License.
 ### description
 #### Syntax
 
-`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep])`
+`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep]) [ORDER BY { col_name | expr} [ASC | DESC])`
 
 
-This function is an aggregation function similar to sum (), and group_concat links multiple rows of results in the result set to a string. The second parameter, sep, is a connection symbol between strings, which can be omitted. This function usually needs to be used with group by statements.
+This function is an aggregation function similar to sum (), and group_concat links multiple rows of results in the result set to a string. The second parameter, sep, is a connection symbol between strings, which can be omitted. 
+Support Order By for sorting multi-row results, sorting and aggregation columns can be different. This function usually needs to be used with group by statements.
 
 ### example
 
@@ -73,6 +74,20 @@ mysql> select GROUP_CONCAT(value, NULL) from test;
 +----------------------------+
 | NULL                       |
 +----------------------------+
+
+SELECT abs(k3), group_concat(distinct cast(abs(k2) as varchar) order by abs(k1), k5) FROM bigtable group by abs(k3) order by abs(k3);     +------------+-------------------------------------------------------------------------------+
+| abs(`k3`)  | group_concat(DISTINCT CAST(abs(`k2`) AS CHARACTER), ORDER BY abs(`k1`), `k5`) |
++------------+-------------------------------------------------------------------------------+
+|        103 | 255                                                                           |
+|       1001 | 1989, 1986                                                                    |
+|       1002 | 1989, 32767                                                                   |
+|       3021 | 1991, 32767, 1992                                                             |
+|       5014 | 1985, 1991                                                                    |
+|      25699 | 1989                                                                          |
+| 2147483647 | 255, 1991, 32767, 32767                                                       |
++------------+-------------------------------------------------------------------------------+
+```
+
 ```
 ### keywords
 GROUP_CONCAT,GROUP,CONCAT
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
index ef6189483f..c82f8029fe 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
@@ -28,10 +28,10 @@ under the License.
 ### description
 #### Syntax
 
-`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep])`
+`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep] [ORDER BY { col_name | expr} [ASC | DESC])`
 
 
-该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略。该函数通常需要和 group by 语句一起使用。
+该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略,并支持Order By进行多行结果的排序,排序和聚合列可不同。该函数通常需要和 group by 语句一起使用。
 
 ### example
 
@@ -73,6 +73,19 @@ mysql> select GROUP_CONCAT(value, NULL) from test;
 +----------------------------+
 | NULL                       |
 +----------------------------+
+
+SELECT abs(k3), group_concat(distinct cast(abs(k2) as varchar) order by abs(k1), k5) FROM bigtable group by abs(k3) order by abs(k3);     +------------+-------------------------------------------------------------------------------+
+| abs(`k3`)  | group_concat(DISTINCT CAST(abs(`k2`) AS CHARACTER), ORDER BY abs(`k1`), `k5`) |
++------------+-------------------------------------------------------------------------------+
+|        103 | 255                                                                           |
+|       1001 | 1989, 1986                                                                    |
+|       1002 | 1989, 32767                                                                   |
+|       3021 | 1991, 32767, 1992                                                             |
+|       5014 | 1985, 1991                                                                    |
+|      25699 | 1989                                                                          |
+| 2147483647 | 255, 1991, 32767, 32767                                                       |
++------------+-------------------------------------------------------------------------------+
 ```
+
 ### keywords
 GROUP_CONCAT,GROUP,CONCAT
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
index 79e01e0cae..93a2cfcaa3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
@@ -327,6 +327,13 @@ public final class AggregateInfo extends AggregateInfoBase {
             // TODO: Deal with constant exprs more generally, instead of special-casing
             // group_concat().
             expr0Children.add(distinctAggExprs.get(0).getChild(0).ignoreImplicitCast());
+            FunctionCallExpr distinctExpr = distinctAggExprs.get(0);
+            if (!distinctExpr.getOrderByElements().isEmpty()) {
+                for (int i = distinctExpr.getChildren().size() - distinctExpr.getOrderByElements().size();
+                        i < distinctExpr.getChildren().size(); i++) {
+                    expr0Children.add(distinctAggExprs.get(0).getChild(i));
+                }
+            }
         } else {
             for (Expr expr : distinctAggExprs.get(0).getChildren()) {
                 expr0Children.add(expr.ignoreImplicitCast());
@@ -592,10 +599,17 @@ public final class AggregateInfo extends AggregateInfoBase {
                     // tuple reference is correct.
                     exprList.add(new SlotRef(inputDesc.getSlots().get(origGroupingExprs.size())));
                     // Check if user provided a custom separator
-                    if (inputExpr.getChildren().size() == 2) {
+                    if (inputExpr.getChildren().size() - inputExpr.getOrderByElements().size() == 2) {
                         exprList.add(inputExpr.getChild(1));
                     }
-                    aggExpr = new FunctionCallExpr(inputExpr.getFnName(), exprList);
+
+                    if (!inputExpr.getOrderByElements().isEmpty()) {
+                        for (int i = 0; i < inputExpr.getOrderByElements().size(); i++) {
+                            inputExpr.getOrderByElements().get(i).setExpr(
+                                new SlotRef(inputDesc.getSlots().get(origGroupingExprs.size() + i + 1)));
+                        }
+                    }
+                    aggExpr = new FunctionCallExpr(inputExpr.getFnName(), exprList, inputExpr.getOrderByElements());
                 } else {
                     // SUM(DISTINCT <expr>) -> SUM(<last grouping slot>);
                     // (MIN(DISTINCT ...) and MAX(DISTINCT ...) have their DISTINCT turned
@@ -658,7 +672,7 @@ public final class AggregateInfo extends AggregateInfoBase {
         // If we are counting distinct params of group_concat, we cannot include the custom
         // separator since it is not a distinct param.
         if (distinctAggExprs.get(0).getFnName().getFunction().equalsIgnoreCase("group_concat")) {
-            numDistinctParams = 1;
+            numDistinctParams = 1 + distinctAggExprs.get(0).getOrderByElements().size();
         }
         int numOrigGroupingExprs = inputAggInfo.getGroupingExprs().size() - numDistinctParams;
         Preconditions.checkState(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 6038d5e3f1..0ad927f0cd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -148,6 +148,11 @@ public class FunctionCallExpr extends Expr {
         this(fnName, new FunctionParams(false, params));
     }
 
+    public FunctionCallExpr(FunctionName fnName, List<Expr> params, List<OrderByElement> orderByElements)
+            throws AnalysisException {
+        this(fnName, new FunctionParams(false, params), orderByElements);
+    }
+
     public FunctionCallExpr(String fnName, FunctionParams params) {
         this(new FunctionName(fnName), params, false);
     }
@@ -168,10 +173,6 @@ public class FunctionCallExpr extends Expr {
                     fnName.getFunction().toLowerCase())) {
                 throw new AnalysisException(
                     "ORDER BY not support for the function:" + fnName.getFunction().toLowerCase());
-            } else if (params.isDistinct()) {
-                throw new AnalysisException(
-                    "ORDER BY not support for the distinct, support in the furture:"
-                        + fnName.getFunction().toLowerCase());
             }
         }
         setChildren();
diff --git a/regression-test/data/query/group_concat/test_group_concat.out b/regression-test/data/query/group_concat/test_group_concat.out
index e61bd4fd4f..8a742347a8 100644
--- a/regression-test/data/query/group_concat/test_group_concat.out
+++ b/regression-test/data/query/group_concat/test_group_concat.out
@@ -25,3 +25,23 @@ false
 25699	1989
 2147483647	255:1991:32767:32767
 
+-- !select --
+\N	\N
+103	255
+1001	1989, 1986
+1002	1989, 32767
+3021	1991, 32767, 1992
+5014	1985, 1991
+25699	1989
+2147483647	255, 1991, 32767, 32767
+
+-- !select --
+\N	\N
+103	255
+1001	1989:1986
+1002	1989:32767
+3021	1991:32767:1992
+5014	1985:1991
+25699	1989
+2147483647	255:1991:32767:32767
+
diff --git a/regression-test/suites/query/group_concat/test_group_concat.groovy b/regression-test/suites/query/group_concat/test_group_concat.groovy
index 12d420cbe0..fcb8a3fec1 100644
--- a/regression-test/suites/query/group_concat/test_group_concat.groovy
+++ b/regression-test/suites/query/group_concat/test_group_concat.groovy
@@ -31,4 +31,11 @@ suite("test_group_concat", "query") {
     qt_select """
                 SELECT abs(k3), group_concat(cast(abs(k2) as varchar), ":" order by abs(k2), k1) FROM test_query_db.baseall group by abs(k3) order by abs(k3)
               """
+    qt_select """
+                SELECT abs(k3), group_concat(distinct cast(abs(k2) as char) order by abs(k1), k2) FROM test_query_db.baseall group by abs(k3) order by abs(k3);
+              """
+    qt_select """
+                SELECT abs(k3), group_concat(distinct cast(abs(k2) as char), ":" order by abs(k1), k2) FROM test_query_db.baseall group by abs(k3) order by abs(k3);
+              """
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org