You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/29 01:11:40 UTC
[doris] branch master updated: [Vectorized] Support both distinct and order by of group_concat (#11278)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new bac280e803 [Vectorized] Support both distinct and order by of group_concat (#11278)
bac280e803 is described below
commit bac280e80309ee1edd7ea6d2317113794bd62555
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Fri Jul 29 09:11:34 2022 +0800
[Vectorized] Support both distinct and order by of group_concat (#11278)
Co-authored-by: lihaopeng <li...@baidu.com>
---
.../aggregate-functions/group_concat.md | 19 +++++++++++++++++--
.../aggregate-functions/group_concat.md | 17 +++++++++++++++--
.../org/apache/doris/analysis/AggregateInfo.java | 20 +++++++++++++++++---
.../org/apache/doris/analysis/FunctionCallExpr.java | 9 +++++----
.../data/query/group_concat/test_group_concat.out | 20 ++++++++++++++++++++
.../query/group_concat/test_group_concat.groovy | 7 +++++++
6 files changed, 81 insertions(+), 11 deletions(-)
diff --git a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
index 7e0095cafd..41820022a2 100644
--- a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
@@ -28,10 +28,11 @@ under the License.
### description
#### Syntax
-`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep])`
+`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep]) [ORDER BY { col_name | expr} [ASC | DESC])`
-This function is an aggregation function similar to sum (), and group_concat links multiple rows of results in the result set to a string. The second parameter, sep, is a connection symbol between strings, which can be omitted. This function usually needs to be used with group by statements.
+This function is an aggregation function similar to sum (), and group_concat links multiple rows of results in the result set to a string. The second parameter, sep, is a connection symbol between strings, which can be omitted.
+Support Order By for sorting multi-row results, sorting and aggregation columns can be different. This function usually needs to be used with group by statements.
### example
@@ -73,6 +74,20 @@ mysql> select GROUP_CONCAT(value, NULL) from test;
+----------------------------+
| NULL |
+----------------------------+
+
+SELECT abs(k3), group_concat(distinct cast(abs(k2) as varchar) order by abs(k1), k5) FROM bigtable group by abs(k3) order by abs(k3); +------------+-------------------------------------------------------------------------------+
+| abs(`k3`) | group_concat(DISTINCT CAST(abs(`k2`) AS CHARACTER), ORDER BY abs(`k1`), `k5`) |
++------------+-------------------------------------------------------------------------------+
+| 103 | 255 |
+| 1001 | 1989, 1986 |
+| 1002 | 1989, 32767 |
+| 3021 | 1991, 32767, 1992 |
+| 5014 | 1985, 1991 |
+| 25699 | 1989 |
+| 2147483647 | 255, 1991, 32767, 32767 |
++------------+-------------------------------------------------------------------------------+
+```
+
```
### keywords
GROUP_CONCAT,GROUP,CONCAT
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
index ef6189483f..c82f8029fe 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/group_concat.md
@@ -28,10 +28,10 @@ under the License.
### description
#### Syntax
-`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep])`
+`VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR str[, VARCHAR sep] [ORDER BY { col_name | expr} [ASC | DESC])`
-该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略。该函数通常需要和 group by 语句一起使用。
+该函数是类似于 sum() 的聚合函数,group_concat 将结果集中的多行结果连接成一个字符串。第二个参数 sep 为字符串之间的连接符号,该参数可以省略,并支持Order By进行多行结果的排序,排序和聚合列可不同。该函数通常需要和 group by 语句一起使用。
### example
@@ -73,6 +73,19 @@ mysql> select GROUP_CONCAT(value, NULL) from test;
+----------------------------+
| NULL |
+----------------------------+
+
+SELECT abs(k3), group_concat(distinct cast(abs(k2) as varchar) order by abs(k1), k5) FROM bigtable group by abs(k3) order by abs(k3); +------------+-------------------------------------------------------------------------------+
+| abs(`k3`) | group_concat(DISTINCT CAST(abs(`k2`) AS CHARACTER), ORDER BY abs(`k1`), `k5`) |
++------------+-------------------------------------------------------------------------------+
+| 103 | 255 |
+| 1001 | 1989, 1986 |
+| 1002 | 1989, 32767 |
+| 3021 | 1991, 32767, 1992 |
+| 5014 | 1985, 1991 |
+| 25699 | 1989 |
+| 2147483647 | 255, 1991, 32767, 32767 |
++------------+-------------------------------------------------------------------------------+
```
+
### keywords
GROUP_CONCAT,GROUP,CONCAT
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
index 79e01e0cae..93a2cfcaa3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
@@ -327,6 +327,13 @@ public final class AggregateInfo extends AggregateInfoBase {
// TODO: Deal with constant exprs more generally, instead of special-casing
// group_concat().
expr0Children.add(distinctAggExprs.get(0).getChild(0).ignoreImplicitCast());
+ FunctionCallExpr distinctExpr = distinctAggExprs.get(0);
+ if (!distinctExpr.getOrderByElements().isEmpty()) {
+ for (int i = distinctExpr.getChildren().size() - distinctExpr.getOrderByElements().size();
+ i < distinctExpr.getChildren().size(); i++) {
+ expr0Children.add(distinctAggExprs.get(0).getChild(i));
+ }
+ }
} else {
for (Expr expr : distinctAggExprs.get(0).getChildren()) {
expr0Children.add(expr.ignoreImplicitCast());
@@ -592,10 +599,17 @@ public final class AggregateInfo extends AggregateInfoBase {
// tuple reference is correct.
exprList.add(new SlotRef(inputDesc.getSlots().get(origGroupingExprs.size())));
// Check if user provided a custom separator
- if (inputExpr.getChildren().size() == 2) {
+ if (inputExpr.getChildren().size() - inputExpr.getOrderByElements().size() == 2) {
exprList.add(inputExpr.getChild(1));
}
- aggExpr = new FunctionCallExpr(inputExpr.getFnName(), exprList);
+
+ if (!inputExpr.getOrderByElements().isEmpty()) {
+ for (int i = 0; i < inputExpr.getOrderByElements().size(); i++) {
+ inputExpr.getOrderByElements().get(i).setExpr(
+ new SlotRef(inputDesc.getSlots().get(origGroupingExprs.size() + i + 1)));
+ }
+ }
+ aggExpr = new FunctionCallExpr(inputExpr.getFnName(), exprList, inputExpr.getOrderByElements());
} else {
// SUM(DISTINCT <expr>) -> SUM(<last grouping slot>);
// (MIN(DISTINCT ...) and MAX(DISTINCT ...) have their DISTINCT turned
@@ -658,7 +672,7 @@ public final class AggregateInfo extends AggregateInfoBase {
// If we are counting distinct params of group_concat, we cannot include the custom
// separator since it is not a distinct param.
if (distinctAggExprs.get(0).getFnName().getFunction().equalsIgnoreCase("group_concat")) {
- numDistinctParams = 1;
+ numDistinctParams = 1 + distinctAggExprs.get(0).getOrderByElements().size();
}
int numOrigGroupingExprs = inputAggInfo.getGroupingExprs().size() - numDistinctParams;
Preconditions.checkState(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 6038d5e3f1..0ad927f0cd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -148,6 +148,11 @@ public class FunctionCallExpr extends Expr {
this(fnName, new FunctionParams(false, params));
}
+ public FunctionCallExpr(FunctionName fnName, List<Expr> params, List<OrderByElement> orderByElements)
+ throws AnalysisException {
+ this(fnName, new FunctionParams(false, params), orderByElements);
+ }
+
public FunctionCallExpr(String fnName, FunctionParams params) {
this(new FunctionName(fnName), params, false);
}
@@ -168,10 +173,6 @@ public class FunctionCallExpr extends Expr {
fnName.getFunction().toLowerCase())) {
throw new AnalysisException(
"ORDER BY not support for the function:" + fnName.getFunction().toLowerCase());
- } else if (params.isDistinct()) {
- throw new AnalysisException(
- "ORDER BY not support for the distinct, support in the furture:"
- + fnName.getFunction().toLowerCase());
}
}
setChildren();
diff --git a/regression-test/data/query/group_concat/test_group_concat.out b/regression-test/data/query/group_concat/test_group_concat.out
index e61bd4fd4f..8a742347a8 100644
--- a/regression-test/data/query/group_concat/test_group_concat.out
+++ b/regression-test/data/query/group_concat/test_group_concat.out
@@ -25,3 +25,23 @@ false
25699 1989
2147483647 255:1991:32767:32767
+-- !select --
+\N \N
+103 255
+1001 1989, 1986
+1002 1989, 32767
+3021 1991, 32767, 1992
+5014 1985, 1991
+25699 1989
+2147483647 255, 1991, 32767, 32767
+
+-- !select --
+\N \N
+103 255
+1001 1989:1986
+1002 1989:32767
+3021 1991:32767:1992
+5014 1985:1991
+25699 1989
+2147483647 255:1991:32767:32767
+
diff --git a/regression-test/suites/query/group_concat/test_group_concat.groovy b/regression-test/suites/query/group_concat/test_group_concat.groovy
index 12d420cbe0..fcb8a3fec1 100644
--- a/regression-test/suites/query/group_concat/test_group_concat.groovy
+++ b/regression-test/suites/query/group_concat/test_group_concat.groovy
@@ -31,4 +31,11 @@ suite("test_group_concat", "query") {
qt_select """
SELECT abs(k3), group_concat(cast(abs(k2) as varchar), ":" order by abs(k2), k1) FROM test_query_db.baseall group by abs(k3) order by abs(k3)
"""
+ qt_select """
+ SELECT abs(k3), group_concat(distinct cast(abs(k2) as char) order by abs(k1), k2) FROM test_query_db.baseall group by abs(k3) order by abs(k3);
+ """
+ qt_select """
+ SELECT abs(k3), group_concat(distinct cast(abs(k2) as char), ":" order by abs(k1), k2) FROM test_query_db.baseall group by abs(k3) order by abs(k3);
+ """
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org