You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/12/20 21:11:37 UTC
(arrow-datafusion) branch main updated: feat: support `LargeList` in `array_dims` (#8592)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 98a5a4eb1e feat: support `LargeList` in `array_dims` (#8592)
98a5a4eb1e is described below
commit 98a5a4eb1ea1277f5fe001e1c7602b37592452f1
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Wed Dec 20 22:11:30 2023 +0100
feat: support `LargeList` in `array_dims` (#8592)
* support LargeList in array_dims
* drop table
* add argument check
---
datafusion/physical-expr/src/array_expressions.rs | 31 +++++++++++++---
datafusion/sqllogictest/test_files/array.slt | 43 +++++++++++++++++++++--
2 files changed, 67 insertions(+), 7 deletions(-)
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index 0a76319188..bdab65cab9 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -1925,12 +1925,33 @@ pub fn array_length(args: &[ArrayRef]) -> Result<ArrayRef> {
/// Array_dims SQL function
pub fn array_dims(args: &[ArrayRef]) -> Result<ArrayRef> {
- let list_array = as_list_array(&args[0])?;
+ if args.len() != 1 {
+ return exec_err!("array_dims needs one argument");
+ }
+
+ let data = match args[0].data_type() {
+ DataType::List(_) => {
+ let array = as_list_array(&args[0])?;
+ array
+ .iter()
+ .map(compute_array_dims)
+ .collect::<Result<Vec<_>>>()?
+ }
+ DataType::LargeList(_) => {
+ let array = as_large_list_array(&args[0])?;
+ array
+ .iter()
+ .map(compute_array_dims)
+ .collect::<Result<Vec<_>>>()?
+ }
+ _ => {
+ return exec_err!(
+ "array_dims does not support type '{:?}'",
+ args[0].data_type()
+ );
+ }
+ };
- let data = list_array
- .iter()
- .map(compute_array_dims)
- .collect::<Result<Vec<_>>>()?;
let result = ListArray::from_iter_primitive::<UInt64Type, _, _>(data);
Ok(Arc::new(result) as ArrayRef)
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index b38f73ecb8..ca33f08de0 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -67,6 +67,16 @@ AS VALUES
(make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL)
;
+statement ok
+CREATE TABLE large_arrays
+AS
+ SELECT
+ arrow_cast(column1, 'LargeList(List(Int64))') AS column1,
+ arrow_cast(column2, 'LargeList(Float64)') AS column2,
+ arrow_cast(column3, 'LargeList(Utf8)') AS column3
+ FROM arrays
+;
+
statement ok
CREATE TABLE slices
AS VALUES
@@ -2820,8 +2830,7 @@ NULL 10
## array_dims (aliases: `list_dims`)
# array dims error
-# TODO this is a separate bug
-query error Internal error: could not cast value to arrow_array::array::list_array::GenericListArray<i32>\.
+query error Execution error: array_dims does not support type 'Int64'
select array_dims(1);
# array_dims scalar function
@@ -2830,6 +2839,11 @@ select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])),
----
[3] [2, 2] [1, 1, 1, 2, 1]
+query ???
+select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
# array_dims scalar function #2
query ??
select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2));
@@ -2842,12 +2856,22 @@ select array_dims(make_array()), array_dims(make_array(make_array()))
----
NULL [1, 0]
+query ??
+select array_dims(arrow_cast(make_array(), 'LargeList(Null)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Null))'))
+----
+NULL [1, 0]
+
# list_dims scalar function #4 (function alias `array_dims`)
query ???
select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]]));
----
[3] [2, 2] [1, 1, 1, 2, 1]
+query ???
+select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'LargeList(List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
# array_dims with columns
query ???
select array_dims(column1), array_dims(column2), array_dims(column3) from arrays;
@@ -2860,6 +2884,18 @@ NULL [3] [4]
[2, 2] NULL [1]
[2, 2] [3] NULL
+query ???
+select array_dims(column1), array_dims(column2), array_dims(column3) from large_arrays;
+----
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [3]
+NULL [3] [4]
+[2, 2] NULL [1]
+[2, 2] [3] NULL
+
+
## array_ndims (aliases: `list_ndims`)
# array_ndims scalar function #1
@@ -3768,6 +3804,9 @@ drop table nested_arrays;
statement ok
drop table arrays;
+statement ok
+drop table large_arrays;
+
statement ok
drop table slices;