You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/01/31 11:33:45 UTC
(arrow-datafusion) branch main updated: feat: support array_reverse (#9023)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 15f59d9861 feat: support array_reverse (#9023)
15f59d9861 is described below
commit 15f59d9861082a4d5d39bddce63d81cc7b9fb299
Author: Alex Huang <hu...@gmail.com>
AuthorDate: Wed Jan 31 19:33:40 2024 +0800
feat: support array_reverse (#9023)
* support array_reverse
* fix typo
* add test
* fix NULL
* fix parse_expr
* fix typo
* fix null in column
* fix null
* add md
* fix ci
* add test for fixedsizelist
* skip null and speed up
* fix fmt
* fix clippy
* reduce code complex
---
datafusion/expr/src/built_in_function.rs | 6 ++
datafusion/expr/src/expr_fn.rs | 6 ++
datafusion/physical-expr/src/array_expressions.rs | 69 +++++++++++++++++++++++
datafusion/physical-expr/src/functions.rs | 3 +
datafusion/proto/proto/datafusion.proto | 1 +
datafusion/proto/src/generated/pbjson.rs | 3 +
datafusion/proto/src/generated/prost.rs | 3 +
datafusion/proto/src/logical_plan/from_proto.rs | 9 ++-
datafusion/proto/src/logical_plan/to_proto.rs | 1 +
datafusion/sqllogictest/test_files/array.slt | 34 +++++++++++
docs/source/user-guide/sql/scalar_functions.md | 33 +++++++++++
11 files changed, 167 insertions(+), 1 deletion(-)
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index f2eb82ebf9..b7bb17c86b 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -171,6 +171,8 @@ pub enum BuiltinScalarFunction {
ArrayReplaceN,
/// array_replace_all
ArrayReplaceAll,
+ /// array_reverse
+ ArrayReverse,
/// array_slice
ArraySlice,
/// array_to_string
@@ -427,6 +429,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplace => Volatility::Immutable,
BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable,
BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable,
+ BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
BuiltinScalarFunction::Flatten => Volatility::Immutable,
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
@@ -622,6 +625,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()),
+ BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
@@ -961,6 +965,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => {
Signature::any(3, self.volatility())
}
+ BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()),
BuiltinScalarFunction::ArraySlice => {
Signature::variadic_any(self.volatility())
}
@@ -1567,6 +1572,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => {
&["array_replace_all", "list_replace_all"]
}
+ BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
BuiltinScalarFunction::ArrayToString => &[
"array_to_string",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 4608badde2..877066aabf 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -728,6 +728,12 @@ scalar_expr!(
array from to,
"replaces all occurrences of the specified element with another specified element."
);
+scalar_expr!(
+ ArrayReverse,
+ array_reverse,
+ array,
+ "reverses the order of elements in the array."
+);
scalar_expr!(
ArraySlice,
array_slice,
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index a3dec2762c..844dae0917 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -2766,6 +2766,75 @@ where
)?))
}
+/// array_reverse SQL function
+pub fn array_reverse(arg: &[ArrayRef]) -> Result<ArrayRef> {
+ if arg.len() != 1 {
+ return exec_err!("array_reverse needs one argument");
+ }
+
+ match &arg[0].data_type() {
+ DataType::List(field) => {
+ let array = as_list_array(&arg[0])?;
+ general_array_reverse::<i32>(array, field)
+ }
+ DataType::LargeList(field) => {
+ let array = as_large_list_array(&arg[0])?;
+ general_array_reverse::<i64>(array, field)
+ }
+ DataType::Null => Ok(arg[0].clone()),
+ array_type => exec_err!("array_reverse does not support type '{array_type:?}'."),
+ }
+}
+
+fn general_array_reverse<O: OffsetSizeTrait>(
+ array: &GenericListArray<O>,
+ field: &FieldRef,
+) -> Result<ArrayRef>
+where
+ O: TryFrom<i64>,
+{
+ let values = array.values();
+ let original_data = values.to_data();
+ let capacity = Capacities::Array(original_data.len());
+ let mut offsets = vec![O::usize_as(0)];
+ let mut nulls = vec![];
+ let mut mutable =
+ MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+
+ for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+ // skip the null value
+ if array.is_null(row_index) {
+ nulls.push(false);
+ offsets.push(offsets[row_index] + O::one());
+ mutable.extend(0, 0, 1);
+ continue;
+ } else {
+ nulls.push(true);
+ }
+
+ let start = offset_window[0];
+ let end = offset_window[1];
+
+ let mut index = end - O::one();
+ let mut cnt = 0;
+
+ while index >= start {
+ mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
+ index = index - O::one();
+ cnt += 1;
+ }
+ offsets.push(offsets[row_index] + O::usize_as(cnt));
+ }
+
+ let data = mutable.freeze();
+ Ok(Arc::new(GenericListArray::<O>::try_new(
+ field.clone(),
+ OffsetBuffer::<O>::new(offsets.into()),
+ arrow_array::make_array(data),
+ Some(nulls.into()),
+ )?))
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index cd4e6f96f0..21eaeab721 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -445,6 +445,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_replace_all)(args)
}),
+ BuiltinScalarFunction::ArrayReverse => Arc::new(|args| {
+ make_scalar_function_inner(array_expressions::array_reverse)(args)
+ }),
BuiltinScalarFunction::ArraySlice => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_slice)(args)
}),
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 0b93820db8..f2b5c5dd42 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -670,6 +670,7 @@ enum ScalarFunction {
EndsWith = 131;
InStr = 132;
MakeDate = 133;
+ ArrayReverse = 134;
}
message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 55e83a8853..b9a8c5fc07 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22422,6 +22422,7 @@ impl serde::Serialize for ScalarFunction {
Self::EndsWith => "EndsWith",
Self::InStr => "InStr",
Self::MakeDate => "MakeDate",
+ Self::ArrayReverse => "ArrayReverse",
};
serializer.serialize_str(variant)
}
@@ -22565,6 +22566,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"EndsWith",
"InStr",
"MakeDate",
+ "ArrayReverse",
];
struct GeneratedVisitor;
@@ -22737,6 +22739,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
"EndsWith" => Ok(ScalarFunction::EndsWith),
"InStr" => Ok(ScalarFunction::InStr),
"MakeDate" => Ok(ScalarFunction::MakeDate),
+ "ArrayReverse" => Ok(ScalarFunction::ArrayReverse),
_ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
}
}
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index b17bcd3a49..758ef2dcb5 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2765,6 +2765,7 @@ pub enum ScalarFunction {
EndsWith = 131,
InStr = 132,
MakeDate = 133,
+ ArrayReverse = 134,
}
impl ScalarFunction {
/// String value of the enum field names used in the ProtoBuf definition.
@@ -2905,6 +2906,7 @@ impl ScalarFunction {
ScalarFunction::EndsWith => "EndsWith",
ScalarFunction::InStr => "InStr",
ScalarFunction::MakeDate => "MakeDate",
+ ScalarFunction::ArrayReverse => "ArrayReverse",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
@@ -3042,6 +3044,7 @@ impl ScalarFunction {
"EndsWith" => Some(Self::EndsWith),
"InStr" => Some(Self::InStr),
"MakeDate" => Some(Self::MakeDate),
+ "ArrayReverse" => Some(Self::ArrayReverse),
_ => None,
}
}
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index b025f79bd1..decf3b1874 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -44,7 +44,6 @@ use datafusion_common::{
Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
Result, ScalarValue,
};
-use datafusion_expr::expr::{Alias, Placeholder};
use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
use datafusion_expr::{
abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
@@ -72,6 +71,10 @@ use datafusion_expr::{
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
WindowFrameUnits,
};
+use datafusion_expr::{
+ array_reverse,
+ expr::{Alias, Placeholder},
+};
#[derive(Debug)]
pub enum Error {
@@ -501,6 +504,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::ArrayReplace => Self::ArrayReplace,
ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
+ ScalarFunction::ArrayReverse => Self::ArrayReverse,
ScalarFunction::ArraySlice => Self::ArraySlice,
ScalarFunction::ArrayToString => Self::ArrayToString,
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
@@ -1458,6 +1462,9 @@ pub fn parse_expr(
parse_expr(&args[1], registry)?,
parse_expr(&args[2], registry)?,
)),
+ ScalarFunction::ArrayReverse => {
+ Ok(array_reverse(parse_expr(&args[0], registry)?))
+ }
ScalarFunction::ArraySlice => Ok(array_slice(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index f7be15136b..e094994840 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1500,6 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace,
BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
+ BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse,
BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index e072e4146f..e6a8181be1 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5054,6 +5054,40 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [
----
[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]]
+## array_reverse
+query ??
+select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1));
+----
+[3, 2, 1] [1]
+
+query ??
+select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)'));
+----
+[3, 2, 1] [1]
+
+#TODO: support after FixedSizeList type coercion
+#query ??
+#select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+#----
+#[3, 2, 1] [1]
+
+query ??
+select array_reverse(NULL), array_reverse([]);
+----
+NULL []
+
+query ??
+select array_reverse(column1), column1 from arrays_values;
+----
+[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40]
+NULL NULL
+[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
### Delete tables
statement ok
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 7bec80b55e..ba69b53e69 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1793,6 +1793,7 @@ from_unixtime(expression)
- [array_replace](#array_replace)
- [array_replace_n](#array_replace_n)
- [array_replace_all](#array_replace_all)
+- [array_reverse](#array_reverse)
- [array_slice](#array_slice)
- [array_to_string](#array_to_string)
- [cardinality](#cardinality)
@@ -2523,6 +2524,34 @@ array_replace_all(array, from, to)
- list_replace_all
+### `array_reverse`
+
+Returns the array with the order of the elements reversed.
+
+```
+array_reverse(array)
+```
+
+#### Arguments
+
+- **array**: Array expression.
+ Can be a constant, column, or function, and any combination of array operators.
+
+#### Example
+
+```
+❯ select array_reverse([1, 2, 3, 4]);
++------------------------------------------------------------+
+| array_reverse(List([1, 2, 3, 4])) |
++------------------------------------------------------------+
+| [4, 3, 2, 1] |
++------------------------------------------------------------+
+```
+
+#### Aliases
+
+- list_reverse
+
### `array_slice`
Returns a slice of the array based on 1-indexed start and end positions.
@@ -2823,6 +2852,10 @@ _Alias of [array_replace_n](#array_replace_n)._
_Alias of [array_replace_all](#array_replace_all)._
+### `list_reverse`
+
+_Alias of [array_reverse](#array_reverse)._
+
### `list_slice`
_Alias of [array_slice](#array_slice)._