You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/01/31 11:33:45 UTC

(arrow-datafusion) branch main updated: feat: support array_reverse (#9023)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 15f59d9861 feat: support array_reverse (#9023)
15f59d9861 is described below

commit 15f59d9861082a4d5d39bddce63d81cc7b9fb299
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Wed Jan 31 19:33:40 2024 +0800

    feat: support array_reverse (#9023)
    
    * support array_reverse
    
    * fix typo
    
    * add test
    
    * fix NULL
    
    * fix parse_expr
    
    * fix typo
    
    * fix null in column
    
    * fix null
    
    * add md
    
    * fix ci
    
    * add test for fixedsizelist
    
    * skip null and speed up
    
    * fix fmt
    
    * fix clippy
    
    * reduce code complex
---
 datafusion/expr/src/built_in_function.rs          |  6 ++
 datafusion/expr/src/expr_fn.rs                    |  6 ++
 datafusion/physical-expr/src/array_expressions.rs | 69 +++++++++++++++++++++++
 datafusion/physical-expr/src/functions.rs         |  3 +
 datafusion/proto/proto/datafusion.proto           |  1 +
 datafusion/proto/src/generated/pbjson.rs          |  3 +
 datafusion/proto/src/generated/prost.rs           |  3 +
 datafusion/proto/src/logical_plan/from_proto.rs   |  9 ++-
 datafusion/proto/src/logical_plan/to_proto.rs     |  1 +
 datafusion/sqllogictest/test_files/array.slt      | 34 +++++++++++
 docs/source/user-guide/sql/scalar_functions.md    | 33 +++++++++++
 11 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index f2eb82ebf9..b7bb17c86b 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -171,6 +171,8 @@ pub enum BuiltinScalarFunction {
     ArrayReplaceN,
     /// array_replace_all
     ArrayReplaceAll,
+    /// array_reverse
+    ArrayReverse,
     /// array_slice
     ArraySlice,
     /// array_to_string
@@ -427,6 +429,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayReplace => Volatility::Immutable,
             BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable,
             BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable,
+            BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
             BuiltinScalarFunction::Flatten => Volatility::Immutable,
             BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
             BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
@@ -622,6 +625,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()),
+            BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
             BuiltinScalarFunction::ArrayToString => Ok(Utf8),
@@ -961,6 +965,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayReplaceAll => {
                 Signature::any(3, self.volatility())
             }
+            BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()),
             BuiltinScalarFunction::ArraySlice => {
                 Signature::variadic_any(self.volatility())
             }
@@ -1567,6 +1572,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayReplaceAll => {
                 &["array_replace_all", "list_replace_all"]
             }
+            BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
             BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
             BuiltinScalarFunction::ArrayToString => &[
                 "array_to_string",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 4608badde2..877066aabf 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -728,6 +728,12 @@ scalar_expr!(
     array from to,
     "replaces all occurrences of the specified element with another specified element."
 );
+scalar_expr!(
+    ArrayReverse,
+    array_reverse,
+    array,
+    "reverses the order of elements in the array."
+);
 scalar_expr!(
     ArraySlice,
     array_slice,
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index a3dec2762c..844dae0917 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -2766,6 +2766,75 @@ where
     )?))
 }
 
+/// array_reverse SQL function
+pub fn array_reverse(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    if arg.len() != 1 {
+        return exec_err!("array_reverse needs one argument");
+    }
+
+    match &arg[0].data_type() {
+        DataType::List(field) => {
+            let array = as_list_array(&arg[0])?;
+            general_array_reverse::<i32>(array, field)
+        }
+        DataType::LargeList(field) => {
+            let array = as_large_list_array(&arg[0])?;
+            general_array_reverse::<i64>(array, field)
+        }
+        DataType::Null => Ok(arg[0].clone()),
+        array_type => exec_err!("array_reverse does not support type '{array_type:?}'."),
+    }
+}
+
+fn general_array_reverse<O: OffsetSizeTrait>(
+    array: &GenericListArray<O>,
+    field: &FieldRef,
+) -> Result<ArrayRef>
+where
+    O: TryFrom<i64>,
+{
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut offsets = vec![O::usize_as(0)];
+    let mut nulls = vec![];
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+
+    for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+        // skip the null value
+        if array.is_null(row_index) {
+            nulls.push(false);
+            offsets.push(offsets[row_index] + O::one());
+            mutable.extend(0, 0, 1);
+            continue;
+        } else {
+            nulls.push(true);
+        }
+
+        let start = offset_window[0];
+        let end = offset_window[1];
+
+        let mut index = end - O::one();
+        let mut cnt = 0;
+
+        while index >= start {
+            mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
+            index = index - O::one();
+            cnt += 1;
+        }
+        offsets.push(offsets[row_index] + O::usize_as(cnt));
+    }
+
+    let data = mutable.freeze();
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        field.clone(),
+        OffsetBuffer::<O>::new(offsets.into()),
+        arrow_array::make_array(data),
+        Some(nulls.into()),
+    )?))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index cd4e6f96f0..21eaeab721 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -445,6 +445,9 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_replace_all)(args)
         }),
+        BuiltinScalarFunction::ArrayReverse => Arc::new(|args| {
+            make_scalar_function_inner(array_expressions::array_reverse)(args)
+        }),
         BuiltinScalarFunction::ArraySlice => Arc::new(|args| {
             make_scalar_function_inner(array_expressions::array_slice)(args)
         }),
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 0b93820db8..f2b5c5dd42 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -670,6 +670,7 @@ enum ScalarFunction {
   EndsWith = 131;
   InStr = 132;
   MakeDate = 133;
+  ArrayReverse = 134;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 55e83a8853..b9a8c5fc07 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22422,6 +22422,7 @@ impl serde::Serialize for ScalarFunction {
             Self::EndsWith => "EndsWith",
             Self::InStr => "InStr",
             Self::MakeDate => "MakeDate",
+            Self::ArrayReverse => "ArrayReverse",
         };
         serializer.serialize_str(variant)
     }
@@ -22565,6 +22566,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "EndsWith",
             "InStr",
             "MakeDate",
+            "ArrayReverse",
         ];
 
         struct GeneratedVisitor;
@@ -22737,6 +22739,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "EndsWith" => Ok(ScalarFunction::EndsWith),
                     "InStr" => Ok(ScalarFunction::InStr),
                     "MakeDate" => Ok(ScalarFunction::MakeDate),
+                    "ArrayReverse" => Ok(ScalarFunction::ArrayReverse),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index b17bcd3a49..758ef2dcb5 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2765,6 +2765,7 @@ pub enum ScalarFunction {
     EndsWith = 131,
     InStr = 132,
     MakeDate = 133,
+    ArrayReverse = 134,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2905,6 +2906,7 @@ impl ScalarFunction {
             ScalarFunction::EndsWith => "EndsWith",
             ScalarFunction::InStr => "InStr",
             ScalarFunction::MakeDate => "MakeDate",
+            ScalarFunction::ArrayReverse => "ArrayReverse",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -3042,6 +3044,7 @@ impl ScalarFunction {
             "EndsWith" => Some(Self::EndsWith),
             "InStr" => Some(Self::InStr),
             "MakeDate" => Some(Self::MakeDate),
+            "ArrayReverse" => Some(Self::ArrayReverse),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index b025f79bd1..decf3b1874 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -44,7 +44,6 @@ use datafusion_common::{
     Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
     Result, ScalarValue,
 };
-use datafusion_expr::expr::{Alias, Placeholder};
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
     abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
@@ -72,6 +71,10 @@ use datafusion_expr::{
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
     WindowFrameUnits,
 };
+use datafusion_expr::{
+    array_reverse,
+    expr::{Alias, Placeholder},
+};
 
 #[derive(Debug)]
 pub enum Error {
@@ -501,6 +504,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::ArrayReplace => Self::ArrayReplace,
             ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
             ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
+            ScalarFunction::ArrayReverse => Self::ArrayReverse,
             ScalarFunction::ArraySlice => Self::ArraySlice,
             ScalarFunction::ArrayToString => Self::ArrayToString,
             ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
@@ -1458,6 +1462,9 @@ pub fn parse_expr(
                     parse_expr(&args[1], registry)?,
                     parse_expr(&args[2], registry)?,
                 )),
+                ScalarFunction::ArrayReverse => {
+                    Ok(array_reverse(parse_expr(&args[0], registry)?))
+                }
                 ScalarFunction::ArraySlice => Ok(array_slice(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index f7be15136b..e094994840 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1500,6 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace,
             BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
             BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
+            BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse,
             BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
             BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
             BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index e072e4146f..e6a8181be1 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5054,6 +5054,40 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [
 ----
 [[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]]
 
+## array_reverse
+query ??
+select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1));
+----
+[3, 2, 1] [1]
+
+query ??
+select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)'));
+----
+[3, 2, 1] [1]
+
+#TODO: support after FixedSizeList type coercion
+#query ??
+#select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+#----
+#[3, 2, 1] [1]
+
+query ??
+select array_reverse(NULL), array_reverse([]);
+----
+NULL []
+
+query ??
+select array_reverse(column1), column1 from arrays_values;
+----
+[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40]
+NULL NULL
+[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
 ### Delete tables
 
 statement ok
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 7bec80b55e..ba69b53e69 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1793,6 +1793,7 @@ from_unixtime(expression)
 - [array_replace](#array_replace)
 - [array_replace_n](#array_replace_n)
 - [array_replace_all](#array_replace_all)
+- [array_reverse](#array_reverse)
 - [array_slice](#array_slice)
 - [array_to_string](#array_to_string)
 - [cardinality](#cardinality)
@@ -2523,6 +2524,34 @@ array_replace_all(array, from, to)
 
 - list_replace_all
 
+### `array_reverse`
+
+Returns the array with the order of the elements reversed.
+
+```
+array_reverse(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+  Can be a constant, column, or function, and any combination of array operators.
+
+#### Example
+
+```
+❯ select array_reverse([1, 2, 3, 4]);
++------------------------------------------------------------+
+| array_reverse(List([1, 2, 3, 4]))                          |
++------------------------------------------------------------+
+| [4, 3, 2, 1]                                               |
++------------------------------------------------------------+
+```
+
+#### Aliases
+
+- list_reverse
+
 ### `array_slice`
 
 Returns a slice of the array based on 1-indexed start and end positions.
@@ -2823,6 +2852,10 @@ _Alias of [array_replace_n](#array_replace_n)._
 
 _Alias of [array_replace_all](#array_replace_all)._
 
+### `list_reverse`
+
+_Alias of [array_reverse](#array_reverse)._
+
 ### `list_slice`
 
 _Alias of [array_slice](#array_slice)._