You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by xu...@apache.org on 2023/06/06 04:08:53 UTC

[doris] branch master updated: [Improve](array-functions) support array first function (#20397)

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1f032a551d [Improve](array-functions) support array first function (#20397)
1f032a551d is described below

commit 1f032a551d76ff2b0795d1bfe675248cc8bad823
Author: amory <wa...@selectdb.com>
AuthorDate: Tue Jun 6 12:08:46 2023 +0800

    [Improve](array-functions) support array first function (#20397)
    
    add array_first(lambda, [1,2,3,null]) function for doris
---
 .../sql-functions/array-functions/array_first.md   | 80 ++++++++++++++++++++++
 docs/sidebars.json                                 |  1 +
 .../sql-functions/array-functions/array_first.md   | 79 +++++++++++++++++++++
 .../doris/analysis/LambdaFunctionCallExpr.java     | 25 ++++++-
 .../array_functions/test_array_first.out           | 28 ++++++++
 .../array_functions/test_array_first.groovy        | 53 ++++++++++++++
 6 files changed, 264 insertions(+), 2 deletions(-)

diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_first.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_first.md
new file mode 100644
index 0000000000..0fddda5fe1
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_first.md
@@ -0,0 +1,80 @@
+---
+{
+    "title": "array_first",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## array_first
+
+<version since="2.0">
+
+array_first
+
+</version>
+
+### description
+Returns the first element in the array for which func(arr1[i]) returns something other than 0.
+
+#### Syntax
+
+```
+T array_first(lambda, ARRAY<T>)
+```
+
+Use a lambda bool expression and an array as the input parameters, the lambda expression is used to evaluate the internal data of other input ARRAY parameters.
+
+### notice
+
+`Only supported in vectorized engine`
+
+### example
+
+```
+mysql> select array_first(x->x>2, [1,2,3,0]) ;
++------------------------------------------------------------------------------------------------+
+| array_first(array_filter(ARRAY(1, 2, 3, 0), array_map([x] -> x(0) > 2, ARRAY(1, 2, 3, 0))), -1) |
++------------------------------------------------------------------------------------------------+
+|                                                                                              3 |
++------------------------------------------------------------------------------------------------+
+
+
+mysql> select array_first(x->x>4, [1,2,3,0]) ; 
++------------------------------------------------------------------------------------------------+
+| array_first(array_filter(ARRAY(1, 2, 3, 0), array_map([x] -> x(0) > 4, ARRAY(1, 2, 3, 0))), -1) |
++------------------------------------------------------------------------------------------------+
+|                                                                                           NULL |
++------------------------------------------------------------------------------------------------+
+
+
+mysql> select array_first(x->x>1, [1,2,3,0]) ;
++---------------------------------------------------------------------------------------------+
+| array_first(array_filter(ARRAY(1, 2, 3, 0), array_map([x] -> x > 1, ARRAY(1, 2, 3, 0))), 1) |
++---------------------------------------------------------------------------------------------+
+|                                                                                           2 |
++---------------------------------------------------------------------------------------------+
+```
+
+
+### keywords
+
+ARRAY, LAST, array_first
diff --git a/docs/sidebars.json b/docs/sidebars.json
index b1598a44d3..947670c78e 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -311,6 +311,7 @@
                                 "sql-manual/sql-functions/array-functions/array_exists",
                                 "sql-manual/sql-functions/array-functions/array_first_index",
                                 "sql-manual/sql-functions/array-functions/array_last_index",
+                                "sql-manual/sql-functions/array-functions/array_first",
                                 "sql-manual/sql-functions/array-functions/array_last",
                                 "sql-manual/sql-functions/array-functions/arrays_overlap",
                                 "sql-manual/sql-functions/array-functions/array_count",
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first.md
new file mode 100644
index 0000000000..a80d99acbc
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_first.md
@@ -0,0 +1,79 @@
+---
+{
+    "title": "array_first",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## array_first
+
+<version since="2.0">
+
+array_first
+
+</version>
+
+### description
+返回数组中的第一个func(arr1[i])值不为0的元素。当数组中所有元素进行func(arr1[i])都为0时,结果返回`NULL`值。
+
+#### Syntax
+
+```
+T array_first(lambda, ARRAY<T>)
+```
+
+使用一个lambda表达式和一个ARRAY作为输入参数,lambda表达式为布尔型,用于对ARRAY中的每个元素进行判断返回值。
+
+### notice
+
+`仅支持向量化引擎中使用`
+
+### example
+
+```
+mysql> select array_first(x->x>2, [1,2,3,0]) ;
++------------------------------------------------------------------------------------------------+
+| array_first(array_filter(ARRAY(1, 2, 3, 0), array_map([x] -> x(0) > 2, ARRAY(1, 2, 3, 0))), -1) |
++------------------------------------------------------------------------------------------------+
+|                                                                                              3 |
++------------------------------------------------------------------------------------------------+
+
+
+mysql> select array_first(x->x>4, [1,2,3,0]) ; 
++------------------------------------------------------------------------------------------------+
+| array_first(array_filter(ARRAY(1, 2, 3, 0), array_map([x] -> x(0) > 4, ARRAY(1, 2, 3, 0))), -1) |
++------------------------------------------------------------------------------------------------+
+|                                                                                           NULL |
++------------------------------------------------------------------------------------------------+
+
+
+mysql> select array_first(x->x>1, [1,2,3,0]) ;
++---------------------------------------------------------------------------------------------+
+| array_first(array_filter(ARRAY(1, 2, 3, 0), array_map([x] -> x > 1, ARRAY(1, 2, 3, 0))), 1) |
++---------------------------------------------------------------------------------------------+
+|                                                                                           2 |
++---------------------------------------------------------------------------------------------+
+```
+
+### keywords
+
+ARRAY, LAST, array_first
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java
index 9fb9122b3e..33a66570e5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java
@@ -37,13 +37,14 @@ import java.util.List;
 public class LambdaFunctionCallExpr extends FunctionCallExpr {
     public static final ImmutableSet<String> LAMBDA_FUNCTION_SET = new ImmutableSortedSet.Builder(
             String.CASE_INSENSITIVE_ORDER).add("array_map").add("array_filter").add("array_exists").add("array_sortby")
-            .add("array_first_index").add("array_last_index").add("array_last").add("array_count").build();
+            .add("array_first_index").add("array_last_index").add("array_first").add("array_last").add("array_count")
+            .build();
     // The functions in this set are all normal array functions when implemented initially.
     // and then wants add lambda expr as the input param, so we rewrite it to contains an array_map lambda function
     // rather than reimplementing a lambda function, this will be reused the implementation of normal array function
     public static final ImmutableSet<String> LAMBDA_MAPPED_FUNCTION_SET = new ImmutableSortedSet.Builder(
             String.CASE_INSENSITIVE_ORDER).add("array_exists").add("array_sortby")
-            .add("array_first_index").add("array_last_index").add("array_last").add("array_count")
+            .add("array_first_index").add("array_last_index").add("array_first").add("array_last").add("array_count")
             .build();
 
     private static final Logger LOG = LogManager.getLogger(LambdaFunctionCallExpr.class);
@@ -215,6 +216,26 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr {
             }
             fnName = new FunctionName(null, "element_at");
             fn = getBuiltinFunction(fnName.getFunction(), argTypes, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
+        } else if (fnName.getFunction().equalsIgnoreCase("array_first")) {
+            // array_last(lambda,array)--->array_first(array,lambda)--->element_at(array_filter,1)
+            if (getChild(childSize - 1) instanceof LambdaFunctionExpr) {
+                List<Expr> params = new ArrayList<>();
+                for (int i = 0; i <= childSize - 1; ++i) {
+                    params.add(getChild(i));
+                }
+                LambdaFunctionCallExpr arrayFilterFunc = new LambdaFunctionCallExpr("array_filter", params);
+                arrayFilterFunc.analyzeImpl(analyzer);
+                IntLiteral indexParam = new IntLiteral(1, Type.INT);
+
+                argTypes = new Type[2];
+                argTypes[0] = getChild(0).getType();
+                argTypes[1] = indexParam.getType();
+                this.children.clear();
+                this.children.add(arrayFilterFunc);
+                this.children.add(indexParam);
+            }
+            fnName = new FunctionName(null, "element_at");
+            fn = getBuiltinFunction(fnName.getFunction(), argTypes, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
         }
         if (fn == null) {
             LOG.warn("fn {} not exists", this.toSqlImpl());
diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_first.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_first.out
new file mode 100644
index 0000000000..bc4f80a957
--- /dev/null
+++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_first.out
@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_00 --
+4
+
+-- !select_01 --
+\N
+
+-- !select_03 --
+5
+
+-- !select_04 --
+b
+
+-- !select_05 --
+10.2
+
+-- !select_06 --
+0	[2]	["123", "124", "125"]
+1	[1, 2, 3, 4, 5]	["234", "124", "125"]
+2	[1, 2, 10, 12, 10]	["345", "234", "123"]
+3	[1, 3, 4, 2]	["222", "444", "555"]
+
+-- !select_07 --
+\N	125
+4	234
+10	345
+4	222
+
diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_first.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_first.groovy
new file mode 100644
index 0000000000..2b4fc07860
--- /dev/null
+++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_first.groovy
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_first") {
+
+    def tableName = "test_array_first"
+        sql "DROP TABLE IF EXISTS ${tableName}"
+        sql """
+            CREATE TABLE IF NOT EXISTS `${tableName}` (
+                `id` int(11) NULL,
+                `c_array1` array<int(11)> NULL, 
+                `c_array2` array<varchar(20)> NULL
+            ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "storage_format" = "V2"
+        )
+        """
+
+
+        sql """INSERT INTO ${tableName} values
+            (0, [2], ['123', '124', '125']),
+            (1, [1,2,3,4,5], ['234', '124', '125']),
+            (2, [1,2,10,12,10], ['345', '234', '123']), 
+            (3, [1,3,4,2], ['222', '444', '555'])
+        """
+        qt_select_00 " select array_first(x -> x>3, [1,2,3,4,5]);"
+        qt_select_01 " select array_first(x -> x<1, [1,2,3,4,5]);"
+        qt_select_03 " select array_first(x -> x>=5,[1,2,3,4,5]);"
+        qt_select_04 " select array_first(x -> x > 'abc', ['a','b','c']);"
+        qt_select_05 " select array_first(x -> x > 5.2 , [10.2, 5.3, 4]);"
+
+        qt_select_06  "select * from ${tableName} order by id;"
+        
+        qt_select_07 " select array_first(x->x>3,c_array1), array_first(x-> x>'124',c_array2) from test_array_first order by id;"        
+        sql "DROP TABLE IF EXISTS ${tableName}"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org