You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/12/20 21:11:37 UTC

(arrow-datafusion) branch main updated: feat: support `LargeList` in `array_dims` (#8592)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 98a5a4eb1e feat: support `LargeList` in `array_dims` (#8592)
98a5a4eb1e is described below

commit 98a5a4eb1ea1277f5fe001e1c7602b37592452f1
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Wed Dec 20 22:11:30 2023 +0100

    feat: support `LargeList` in `array_dims` (#8592)
    
    * support LargeList in array_dims
    
    * drop table
    
    * add argument check
---
 datafusion/physical-expr/src/array_expressions.rs | 31 +++++++++++++---
 datafusion/sqllogictest/test_files/array.slt      | 43 +++++++++++++++++++++--
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index 0a76319188..bdab65cab9 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -1925,12 +1925,33 @@ pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Array_dims SQL function
 pub fn array_dims(args: &[ArrayRef]) -> Result<ArrayRef> {
-    let list_array = as_list_array(&args[0])?;
+    if args.len() != 1 {
+        return exec_err!("array_dims needs one argument");
+    }
+
+    let data = match args[0].data_type() {
+        DataType::List(_) => {
+            let array = as_list_array(&args[0])?;
+            array
+                .iter()
+                .map(compute_array_dims)
+                .collect::<Result<Vec<_>>>()?
+        }
+        DataType::LargeList(_) => {
+            let array = as_large_list_array(&args[0])?;
+            array
+                .iter()
+                .map(compute_array_dims)
+                .collect::<Result<Vec<_>>>()?
+        }
+        _ => {
+            return exec_err!(
+                "array_dims does not support type '{:?}'",
+                args[0].data_type()
+            );
+        }
+    };
 
-    let data = list_array
-        .iter()
-        .map(compute_array_dims)
-        .collect::<Result<Vec<_>>>()?;
     let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
 
     Ok(Arc::new(result) as ArrayRef)
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index b38f73ecb8..ca33f08de0 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -67,6 +67,16 @@ AS VALUES
   (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL)
 ;
 
+statement ok
+CREATE TABLE large_arrays
+AS
+  SELECT
+    arrow_cast(column1, 'LargeList(List(Int64))') AS column1,
+    arrow_cast(column2, 'LargeList(Float64)') AS column2,
+    arrow_cast(column3, 'LargeList(Utf8)') AS column3
+  FROM arrays
+;
+
 statement ok
 CREATE TABLE slices
 AS VALUES
@@ -2820,8 +2830,7 @@ NULL 10
 ## array_dims (aliases: `list_dims`)
 
 # array dims error
-# TODO this is a separate bug
-query error Internal error: could not cast value to arrow_array::array::list_array::GenericListArray<i32>\.
+query error Execution error: array_dims does not support type 'Int64'
 select array_dims(1);
 
 # array_dims scalar function
@@ -2830,6 +2839,11 @@ select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])),
 ----
 [3] [2, 2] [1, 1, 1, 2, 1]
 
+query ???
+select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
 # array_dims scalar function #2
 query ??
 select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2));
@@ -2842,12 +2856,22 @@ select array_dims(make_array()), array_dims(make_array(make_array()))
 ----
 NULL [1, 0]
 
+query ??
+select array_dims(arrow_cast(make_array(), 'LargeList(Null)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Null))'))
+----
+NULL [1, 0]
+
 # list_dims scalar function #4 (function alias `array_dims`)
 query ???
 select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]]));
 ----
 [3] [2, 2] [1, 1, 1, 2, 1]
 
+query ???
+select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
 # array_dims with columns
 query ???
 select array_dims(column1), array_dims(column2), array_dims(column3) from arrays;
@@ -2860,6 +2884,18 @@ NULL [3] [4]
 [2, 2] NULL [1]
 [2, 2] [3] NULL
 
+query ???
+select array_dims(column1), array_dims(column2), array_dims(column3) from large_arrays;
+----
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [3]
+NULL [3] [4]
+[2, 2] NULL [1]
+[2, 2] [3] NULL
+
+
 ## array_ndims (aliases: `list_ndims`)
 
 # array_ndims scalar function #1
@@ -3768,6 +3804,9 @@ drop table nested_arrays;
 statement ok
 drop table arrays;
 
+statement ok
+drop table large_arrays;
+
 statement ok
 drop table slices;