You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/09 13:16:43 UTC
[arrow-datafusion] 01/03: move expr functions to datafusion-expr expr_fn
This is an automated email from the ASF dual-hosted git repository.
jiayuliu pushed a commit to branch physical-plan
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
commit 73968992f0bd0d40299b9b243cca3d807ac97cb6
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Feb 9 13:08:39 2022 +0800
move expr functions to datafusion-expr expr_fn
---
datafusion-expr/src/expr.rs | 25 ++
datafusion-expr/src/expr_fn.rs | 388 ++++++++++++++++++++++++++++-
datafusion-expr/src/lib.rs | 4 +-
datafusion-expr/src/operator.rs | 29 +++
datafusion/src/logical_plan/expr.rs | 414 +------------------------------
datafusion/src/logical_plan/operators.rs | 29 ---
6 files changed, 450 insertions(+), 439 deletions(-)
diff --git a/datafusion-expr/src/expr.rs b/datafusion-expr/src/expr.rs
index f26f1df..e998ebb 100644
--- a/datafusion-expr/src/expr.rs
+++ b/datafusion-expr/src/expr.rs
@@ -696,3 +696,28 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
)),
}
}
+
+#[cfg(test)]
+mod test {
+ use crate::expr_fn::col;
+ use crate::lit;
+
+ #[test]
+ fn test_not() {
+ assert_eq!(lit(1).not(), !lit(1));
+ }
+
+ #[test]
+ fn test_partial_ord() {
+ // Test validates that partial ord is defined for Expr using hashes, not
+ // intended to exhaustively test all possibilities
+ let exp1 = col("a") + lit(1);
+ let exp2 = col("a") + lit(2);
+ let exp3 = !(col("a") + lit(2));
+
+ assert!(exp1 < exp2);
+ assert!(exp2 > exp1);
+ assert!(exp2 > exp3);
+ assert!(exp3 < exp2);
+ }
+}
diff --git a/datafusion-expr/src/expr_fn.rs b/datafusion-expr/src/expr_fn.rs
index 469a82d..2c3a1f4 100644
--- a/datafusion-expr/src/expr_fn.rs
+++ b/datafusion-expr/src/expr_fn.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::{Expr, Operator};
+use crate::{aggregate_function, built_in_function, lit, Expr, Operator};
/// Create a column expression based on a qualified or unqualified column name
pub fn col(ident: &str) -> Expr {
@@ -30,3 +30,389 @@ pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
right: Box::new(r),
}
}
+
+/// return a new expression with a logical AND
+pub fn and(left: Expr, right: Expr) -> Expr {
+ Expr::BinaryExpr {
+ left: Box::new(left),
+ op: Operator::And,
+ right: Box::new(right),
+ }
+}
+
+/// return a new expression with a logical OR
+pub fn or(left: Expr, right: Expr) -> Expr {
+ Expr::BinaryExpr {
+ left: Box::new(left),
+ op: Operator::Or,
+ right: Box::new(right),
+ }
+}
+
+/// Create an expression to represent the min() aggregate function
+pub fn min(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Min,
+ distinct: false,
+ args: vec![expr],
+ }
+}
+
+/// Create an expression to represent the max() aggregate function
+pub fn max(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Max,
+ distinct: false,
+ args: vec![expr],
+ }
+}
+
+/// Create an expression to represent the sum() aggregate function
+pub fn sum(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Sum,
+ distinct: false,
+ args: vec![expr],
+ }
+}
+
+/// Create an expression to represent the avg() aggregate function
+pub fn avg(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Avg,
+ distinct: false,
+ args: vec![expr],
+ }
+}
+
+/// Create an expression to represent the count() aggregate function
+pub fn count(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Count,
+ distinct: false,
+ args: vec![expr],
+ }
+}
+
+/// Create an expression to represent the count(distinct) aggregate function
+pub fn count_distinct(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::Count,
+ distinct: true,
+ args: vec![expr],
+ }
+}
+
+/// Create an in_list expression
+pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
+ Expr::InList {
+ expr: Box::new(expr),
+ list,
+ negated,
+ }
+}
+
+/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
+pub fn concat(args: &[Expr]) -> Expr {
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::Concat,
+ args: args.to_vec(),
+ }
+}
+
+/// Concatenates all but the first argument, with separators.
+/// The first argument is used as the separator string, and should not be NULL.
+/// Other NULL arguments are ignored.
+pub fn concat_ws(sep: impl Into<String>, values: &[Expr]) -> Expr {
+ let mut args = vec![lit(sep.into())];
+ args.extend_from_slice(values);
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::ConcatWithSeparator,
+ args,
+ }
+}
+
+/// Returns a random value in the range 0.0 <= x < 1.0
+pub fn random() -> Expr {
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::Random,
+ args: vec![],
+ }
+}
+
+/// Returns the approximate number of distinct input values.
+/// This function provides an approximation of count(DISTINCT x).
+/// Zero is returned if all input values are null.
+/// This function should produce a standard error of 0.81%,
+/// which is the standard deviation of the (approximately normal)
+/// error distribution over all possible sets.
+/// It does not guarantee an upper bound on the error for any specific input set.
+pub fn approx_distinct(expr: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::ApproxDistinct,
+ distinct: false,
+ args: vec![expr],
+ }
+}
+
+/// Calculate an approximation of the specified `percentile` for `expr`.
+pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
+ Expr::AggregateFunction {
+ fun: aggregate_function::AggregateFunction::ApproxPercentileCont,
+ distinct: false,
+ args: vec![expr, percentile],
+ }
+}
+
+// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
+// varying arity functions
+/// Create an convenience function representing a unary scalar function
+macro_rules! unary_scalar_expr {
+ ($ENUM:ident, $FUNC:ident) => {
+ #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ]
+ pub fn $FUNC(e: Expr) -> Expr {
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::$ENUM,
+ args: vec![e],
+ }
+ }
+ };
+}
+
+macro_rules! scalar_expr {
+ ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+ #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
+ pub fn $FUNC($($arg: Expr),*) -> Expr {
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::$ENUM,
+ args: vec![$($arg),*],
+ }
+ }
+ };
+}
+
+macro_rules! nary_scalar_expr {
+ ($ENUM:ident, $FUNC:ident) => {
+ #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
+ pub fn $FUNC(args: Vec<Expr>) -> Expr {
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::$ENUM,
+ args,
+ }
+ }
+ };
+}
+
+// generate methods for creating the supported unary/binary expressions
+
+// math functions
+unary_scalar_expr!(Sqrt, sqrt);
+unary_scalar_expr!(Sin, sin);
+unary_scalar_expr!(Cos, cos);
+unary_scalar_expr!(Tan, tan);
+unary_scalar_expr!(Asin, asin);
+unary_scalar_expr!(Acos, acos);
+unary_scalar_expr!(Atan, atan);
+unary_scalar_expr!(Floor, floor);
+unary_scalar_expr!(Ceil, ceil);
+unary_scalar_expr!(Now, now);
+unary_scalar_expr!(Round, round);
+unary_scalar_expr!(Trunc, trunc);
+unary_scalar_expr!(Abs, abs);
+unary_scalar_expr!(Signum, signum);
+unary_scalar_expr!(Exp, exp);
+unary_scalar_expr!(Log2, log2);
+unary_scalar_expr!(Log10, log10);
+unary_scalar_expr!(Ln, ln);
+
+// string functions
+scalar_expr!(Ascii, ascii, string);
+scalar_expr!(BitLength, bit_length, string);
+nary_scalar_expr!(Btrim, btrim);
+scalar_expr!(CharacterLength, character_length, string);
+scalar_expr!(CharacterLength, length, string);
+scalar_expr!(Chr, chr, string);
+scalar_expr!(Digest, digest, string, algorithm);
+scalar_expr!(InitCap, initcap, string);
+scalar_expr!(Left, left, string, count);
+scalar_expr!(Lower, lower, string);
+nary_scalar_expr!(Lpad, lpad);
+scalar_expr!(Ltrim, ltrim, string);
+scalar_expr!(MD5, md5, string);
+scalar_expr!(OctetLength, octet_length, string);
+nary_scalar_expr!(RegexpMatch, regexp_match);
+nary_scalar_expr!(RegexpReplace, regexp_replace);
+scalar_expr!(Replace, replace, string, from, to);
+scalar_expr!(Repeat, repeat, string, count);
+scalar_expr!(Reverse, reverse, string);
+scalar_expr!(Right, right, string, count);
+nary_scalar_expr!(Rpad, rpad);
+scalar_expr!(Rtrim, rtrim, string);
+scalar_expr!(SHA224, sha224, string);
+scalar_expr!(SHA256, sha256, string);
+scalar_expr!(SHA384, sha384, string);
+scalar_expr!(SHA512, sha512, string);
+scalar_expr!(SplitPart, split_part, expr, delimiter, index);
+scalar_expr!(StartsWith, starts_with, string, characters);
+scalar_expr!(Strpos, strpos, string, substring);
+scalar_expr!(Substr, substr, string, position);
+scalar_expr!(ToHex, to_hex, string);
+scalar_expr!(Translate, translate, string, from, to);
+scalar_expr!(Trim, trim, string);
+scalar_expr!(Upper, upper, string);
+
+// date functions
+scalar_expr!(DatePart, date_part, part, date);
+scalar_expr!(DateTrunc, date_trunc, part, date);
+
+/// returns an array of fixed size with each argument on it.
+pub fn array(args: Vec<Expr>) -> Expr {
+ Expr::ScalarFunction {
+ fun: built_in_function::BuiltinScalarFunction::Array,
+ args,
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn filter_is_null_and_is_not_null() {
+ let col_null = col("col1");
+ let col_not_null = col("col2");
+ assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
+ assert_eq!(
+ format!("{:?}", col_not_null.is_not_null()),
+ "#col2 IS NOT NULL"
+ );
+ }
+
+ macro_rules! test_unary_scalar_expr {
+ ($ENUM:ident, $FUNC:ident) => {{
+ if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) {
+ let name = built_in_function::BuiltinScalarFunction::$ENUM;
+ assert_eq!(name, fun);
+ assert_eq!(1, args.len());
+ } else {
+ assert!(false, "unexpected");
+ }
+ }};
+ }
+
+ macro_rules! test_scalar_expr {
+ ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+ let expected = vec![$(stringify!($arg)),*];
+ let result = $FUNC(
+ $(
+ col(stringify!($arg.to_string()))
+ ),*
+ );
+ if let Expr::ScalarFunction { fun, args } = result {
+ let name = built_in_function::BuiltinScalarFunction::$ENUM;
+ assert_eq!(name, fun);
+ assert_eq!(expected.len(), args.len());
+ } else {
+ assert!(false, "unexpected: {:?}", result);
+ }
+ };
+ }
+
+ macro_rules! test_nary_scalar_expr {
+ ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+ let expected = vec![$(stringify!($arg)),*];
+ let result = $FUNC(
+ vec![
+ $(
+ col(stringify!($arg.to_string()))
+ ),*
+ ]
+ );
+ if let Expr::ScalarFunction { fun, args } = result {
+ let name = built_in_function::BuiltinScalarFunction::$ENUM;
+ assert_eq!(name, fun);
+ assert_eq!(expected.len(), args.len());
+ } else {
+ assert!(false, "unexpected: {:?}", result);
+ }
+ };
+ }
+
+ #[test]
+ fn scalar_function_definitions() {
+ test_unary_scalar_expr!(Sqrt, sqrt);
+ test_unary_scalar_expr!(Sin, sin);
+ test_unary_scalar_expr!(Cos, cos);
+ test_unary_scalar_expr!(Tan, tan);
+ test_unary_scalar_expr!(Asin, asin);
+ test_unary_scalar_expr!(Acos, acos);
+ test_unary_scalar_expr!(Atan, atan);
+ test_unary_scalar_expr!(Floor, floor);
+ test_unary_scalar_expr!(Ceil, ceil);
+ test_unary_scalar_expr!(Now, now);
+ test_unary_scalar_expr!(Round, round);
+ test_unary_scalar_expr!(Trunc, trunc);
+ test_unary_scalar_expr!(Abs, abs);
+ test_unary_scalar_expr!(Signum, signum);
+ test_unary_scalar_expr!(Exp, exp);
+ test_unary_scalar_expr!(Log2, log2);
+ test_unary_scalar_expr!(Log10, log10);
+ test_unary_scalar_expr!(Ln, ln);
+
+ test_scalar_expr!(Ascii, ascii, input);
+ test_scalar_expr!(BitLength, bit_length, string);
+ test_nary_scalar_expr!(Btrim, btrim, string);
+ test_nary_scalar_expr!(Btrim, btrim, string, characters);
+ test_scalar_expr!(CharacterLength, character_length, string);
+ test_scalar_expr!(CharacterLength, length, string);
+ test_scalar_expr!(Chr, chr, string);
+ test_scalar_expr!(Digest, digest, string, algorithm);
+ test_scalar_expr!(InitCap, initcap, string);
+ test_scalar_expr!(Left, left, string, count);
+ test_scalar_expr!(Lower, lower, string);
+ test_nary_scalar_expr!(Lpad, lpad, string, count);
+ test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
+ test_scalar_expr!(Ltrim, ltrim, string);
+ test_scalar_expr!(MD5, md5, string);
+ test_scalar_expr!(OctetLength, octet_length, string);
+ test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
+ test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
+ test_nary_scalar_expr!(
+ RegexpReplace,
+ regexp_replace,
+ string,
+ pattern,
+ replacement
+ );
+ test_nary_scalar_expr!(
+ RegexpReplace,
+ regexp_replace,
+ string,
+ pattern,
+ replacement,
+ flags
+ );
+ test_scalar_expr!(Replace, replace, string, from, to);
+ test_scalar_expr!(Repeat, repeat, string, count);
+ test_scalar_expr!(Reverse, reverse, string);
+ test_scalar_expr!(Right, right, string, count);
+ test_nary_scalar_expr!(Rpad, rpad, string, count);
+ test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
+ test_scalar_expr!(Rtrim, rtrim, string);
+ test_scalar_expr!(SHA224, sha224, string);
+ test_scalar_expr!(SHA256, sha256, string);
+ test_scalar_expr!(SHA384, sha384, string);
+ test_scalar_expr!(SHA512, sha512, string);
+ test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
+ test_scalar_expr!(StartsWith, starts_with, string, characters);
+ test_scalar_expr!(Strpos, strpos, string, substring);
+ test_scalar_expr!(Substr, substr, string, position);
+ test_scalar_expr!(ToHex, to_hex, string);
+ test_scalar_expr!(Translate, translate, string, from, to);
+ test_scalar_expr!(Trim, trim, string);
+ test_scalar_expr!(Upper, upper, string);
+
+ test_scalar_expr!(DatePart, date_part, part, date);
+ test_scalar_expr!(DateTrunc, date_trunc, part, date);
+ }
+}
diff --git a/datafusion-expr/src/lib.rs b/datafusion-expr/src/lib.rs
index 709fa63..1d0837f 100644
--- a/datafusion-expr/src/lib.rs
+++ b/datafusion-expr/src/lib.rs
@@ -37,8 +37,8 @@ pub use columnar_value::{ColumnarValue, NullColumnarValue};
pub use expr::Expr;
pub use expr_fn::col;
pub use function::{
- AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
- StateTypeFunction,
+ AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
+ StateTypeFunction,
};
pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
pub use operator::Operator;
diff --git a/datafusion-expr/src/operator.rs b/datafusion-expr/src/operator.rs
index a1cad76..585627f 100644
--- a/datafusion-expr/src/operator.rs
+++ b/datafusion-expr/src/operator.rs
@@ -138,3 +138,32 @@ impl ops::Rem for Expr {
binary_expr(self, Operator::Modulo, rhs)
}
}
+
+#[cfg(test)]
+mod tests {
+ use crate::lit;
+
+ #[test]
+ fn test_operators() {
+ assert_eq!(
+ format!("{:?}", lit(1u32) + lit(2u32)),
+ "UInt32(1) + UInt32(2)"
+ );
+ assert_eq!(
+ format!("{:?}", lit(1u32) - lit(2u32)),
+ "UInt32(1) - UInt32(2)"
+ );
+ assert_eq!(
+ format!("{:?}", lit(1u32) * lit(2u32)),
+ "UInt32(1) * UInt32(2)"
+ );
+ assert_eq!(
+ format!("{:?}", lit(1u32) / lit(2u32)),
+ "UInt32(1) / UInt32(2)"
+ );
+ assert_eq!(
+ format!("{:?}", lit(1u32) % lit(2u32)),
+ "UInt32(1) % UInt32(2)"
+ );
+ }
+}
diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
index de05298..ba853ee 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -19,15 +19,17 @@
//! such as `col = 5` or `SUM(col)`. See examples on the [`Expr`] struct.
pub use super::Operator;
-use crate::error::{DataFusionError, Result};
+use crate::error::Result;
use crate::logical_plan::ExprSchemable;
use crate::logical_plan::{DFField, DFSchema};
use crate::physical_plan::udaf::AggregateUDF;
-use crate::physical_plan::{aggregates, functions, udf::ScalarUDF};
+use crate::physical_plan::udf::ScalarUDF;
use arrow::datatypes::DataType;
+use datafusion_common::DataFusionError;
pub use datafusion_common::{Column, ExprSchema};
-pub use datafusion_expr::expr_fn::col;
+pub use datafusion_expr::expr_fn::*;
use datafusion_expr::AccumulatorFunctionImplementation;
+use datafusion_expr::BuiltinScalarFunction;
pub use datafusion_expr::Expr;
use datafusion_expr::StateTypeFunction;
pub use datafusion_expr::{lit, lit_timestamp_nano, Literal};
@@ -64,9 +66,7 @@ impl CaseBuilder {
pub fn end(&self) -> Result<Expr> {
self.build()
}
-}
-impl CaseBuilder {
fn build(&self) -> Result<Expr> {
// collect all "then" expressions
let mut then_expr = self.then_expr.clone();
@@ -127,15 +127,6 @@ pub fn when(when: Expr, then: Expr) -> CaseBuilder {
}
}
-/// return a new expression with a logical AND
-pub fn and(left: Expr, right: Expr) -> Expr {
- Expr::BinaryExpr {
- left: Box::new(left),
- op: Operator::And,
- right: Box::new(right),
- }
-}
-
/// Combines an array of filter expressions into a single filter expression
/// consisting of the input filter expressions joined with logical AND.
/// Returns None if the filters array is empty.
@@ -150,15 +141,6 @@ pub fn combine_filters(filters: &[Expr]) -> Option<Expr> {
Some(combined_filter)
}
-/// return a new expression with a logical OR
-pub fn or(left: Expr, right: Expr) -> Expr {
- Expr::BinaryExpr {
- left: Box::new(left),
- op: Operator::Or,
- right: Box::new(right),
- }
-}
-
/// Convert an expression into Column expression if it's already provided as input plan.
///
/// For example, it rewrites:
@@ -200,230 +182,6 @@ pub fn unalias(expr: Expr) -> Expr {
}
}
-/// Create an expression to represent the min() aggregate function
-pub fn min(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Min,
- distinct: false,
- args: vec![expr],
- }
-}
-
-/// Create an expression to represent the max() aggregate function
-pub fn max(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Max,
- distinct: false,
- args: vec![expr],
- }
-}
-
-/// Create an expression to represent the sum() aggregate function
-pub fn sum(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Sum,
- distinct: false,
- args: vec![expr],
- }
-}
-
-/// Create an expression to represent the avg() aggregate function
-pub fn avg(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Avg,
- distinct: false,
- args: vec![expr],
- }
-}
-
-/// Create an expression to represent the count() aggregate function
-pub fn count(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Count,
- distinct: false,
- args: vec![expr],
- }
-}
-
-/// Create an expression to represent the count(distinct) aggregate function
-pub fn count_distinct(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::Count,
- distinct: true,
- args: vec![expr],
- }
-}
-
-/// Create an in_list expression
-pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
- Expr::InList {
- expr: Box::new(expr),
- list,
- negated,
- }
-}
-
-/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
-pub fn concat(args: &[Expr]) -> Expr {
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::Concat,
- args: args.to_vec(),
- }
-}
-
-/// Concatenates all but the first argument, with separators.
-/// The first argument is used as the separator string, and should not be NULL.
-/// Other NULL arguments are ignored.
-pub fn concat_ws(sep: impl Into<String>, values: &[Expr]) -> Expr {
- let mut args = vec![lit(sep.into())];
- args.extend_from_slice(values);
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::ConcatWithSeparator,
- args,
- }
-}
-
-/// Returns a random value in the range 0.0 <= x < 1.0
-pub fn random() -> Expr {
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::Random,
- args: vec![],
- }
-}
-
-/// Returns the approximate number of distinct input values.
-/// This function provides an approximation of count(DISTINCT x).
-/// Zero is returned if all input values are null.
-/// This function should produce a standard error of 0.81%,
-/// which is the standard deviation of the (approximately normal)
-/// error distribution over all possible sets.
-/// It does not guarantee an upper bound on the error for any specific input set.
-pub fn approx_distinct(expr: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::ApproxDistinct,
- distinct: false,
- args: vec![expr],
- }
-}
-
-/// Calculate an approximation of the specified `percentile` for `expr`.
-pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
- Expr::AggregateFunction {
- fun: aggregates::AggregateFunction::ApproxPercentileCont,
- distinct: false,
- args: vec![expr, percentile],
- }
-}
-
-// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
-// varying arity functions
-/// Create an convenience function representing a unary scalar function
-macro_rules! unary_scalar_expr {
- ($ENUM:ident, $FUNC:ident) => {
- #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ]
- pub fn $FUNC(e: Expr) -> Expr {
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::$ENUM,
- args: vec![e],
- }
- }
- };
-}
-
-macro_rules! scalar_expr {
- ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
- #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
- pub fn $FUNC($($arg: Expr),*) -> Expr {
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::$ENUM,
- args: vec![$($arg),*],
- }
- }
- };
-}
-
-macro_rules! nary_scalar_expr {
- ($ENUM:ident, $FUNC:ident) => {
- #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
- pub fn $FUNC(args: Vec<Expr>) -> Expr {
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::$ENUM,
- args,
- }
- }
- };
-}
-
-// generate methods for creating the supported unary/binary expressions
-
-// math functions
-unary_scalar_expr!(Sqrt, sqrt);
-unary_scalar_expr!(Sin, sin);
-unary_scalar_expr!(Cos, cos);
-unary_scalar_expr!(Tan, tan);
-unary_scalar_expr!(Asin, asin);
-unary_scalar_expr!(Acos, acos);
-unary_scalar_expr!(Atan, atan);
-unary_scalar_expr!(Floor, floor);
-unary_scalar_expr!(Ceil, ceil);
-unary_scalar_expr!(Now, now);
-unary_scalar_expr!(Round, round);
-unary_scalar_expr!(Trunc, trunc);
-unary_scalar_expr!(Abs, abs);
-unary_scalar_expr!(Signum, signum);
-unary_scalar_expr!(Exp, exp);
-unary_scalar_expr!(Log2, log2);
-unary_scalar_expr!(Log10, log10);
-unary_scalar_expr!(Ln, ln);
-
-// string functions
-scalar_expr!(Ascii, ascii, string);
-scalar_expr!(BitLength, bit_length, string);
-nary_scalar_expr!(Btrim, btrim);
-scalar_expr!(CharacterLength, character_length, string);
-scalar_expr!(CharacterLength, length, string);
-scalar_expr!(Chr, chr, string);
-scalar_expr!(Digest, digest, string, algorithm);
-scalar_expr!(InitCap, initcap, string);
-scalar_expr!(Left, left, string, count);
-scalar_expr!(Lower, lower, string);
-nary_scalar_expr!(Lpad, lpad);
-scalar_expr!(Ltrim, ltrim, string);
-scalar_expr!(MD5, md5, string);
-scalar_expr!(OctetLength, octet_length, string);
-nary_scalar_expr!(RegexpMatch, regexp_match);
-nary_scalar_expr!(RegexpReplace, regexp_replace);
-scalar_expr!(Replace, replace, string, from, to);
-scalar_expr!(Repeat, repeat, string, count);
-scalar_expr!(Reverse, reverse, string);
-scalar_expr!(Right, right, string, count);
-nary_scalar_expr!(Rpad, rpad);
-scalar_expr!(Rtrim, rtrim, string);
-scalar_expr!(SHA224, sha224, string);
-scalar_expr!(SHA256, sha256, string);
-scalar_expr!(SHA384, sha384, string);
-scalar_expr!(SHA512, sha512, string);
-scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-scalar_expr!(StartsWith, starts_with, string, characters);
-scalar_expr!(Strpos, strpos, string, substring);
-scalar_expr!(Substr, substr, string, position);
-scalar_expr!(ToHex, to_hex, string);
-scalar_expr!(Translate, translate, string, from, to);
-scalar_expr!(Trim, trim, string);
-scalar_expr!(Upper, upper, string);
-
-// date functions
-scalar_expr!(DatePart, date_part, part, date);
-scalar_expr!(DateTrunc, date_trunc, part, date);
-
-/// returns an array of fixed size with each argument on it.
-pub fn array(args: Vec<Expr>) -> Expr {
- Expr::ScalarFunction {
- fun: functions::BuiltinScalarFunction::Array,
- args,
- }
-}
-
/// Creates a new UDF with a specific signature and specific return type.
/// This is a helper function to create a new UDF.
/// The function `create_udf` returns a subset of all possible `ScalarFunction`:
@@ -483,7 +241,7 @@ pub fn exprlist_to_fields<'a>(
/// let expr = call_fn("sin", vec![col("x")]).unwrap().lt(lit(0.2));
/// ```
pub fn call_fn(name: impl AsRef<str>, args: Vec<Expr>) -> Result<Expr> {
- match name.as_ref().parse::<functions::BuiltinScalarFunction>() {
+ match name.as_ref().parse::<BuiltinScalarFunction>() {
Ok(fun) => Ok(Expr::ScalarFunction { fun, args }),
Err(e) => Err(e),
}
@@ -512,75 +270,9 @@ mod tests {
}
#[test]
- fn filter_is_null_and_is_not_null() {
- let col_null = col("col1");
- let col_not_null = col("col2");
- assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
- assert_eq!(
- format!("{:?}", col_not_null.is_not_null()),
- "#col2 IS NOT NULL"
- );
- }
-
- #[test]
- fn test_not() {
- assert_eq!(lit(1).not(), !lit(1));
- }
-
- macro_rules! test_unary_scalar_expr {
- ($ENUM:ident, $FUNC:ident) => {{
- if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) {
- let name = functions::BuiltinScalarFunction::$ENUM;
- assert_eq!(name, fun);
- assert_eq!(1, args.len());
- } else {
- assert!(false, "unexpected");
- }
- }};
- }
-
- macro_rules! test_scalar_expr {
- ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
- let expected = vec![$(stringify!($arg)),*];
- let result = $FUNC(
- $(
- col(stringify!($arg.to_string()))
- ),*
- );
- if let Expr::ScalarFunction { fun, args } = result {
- let name = functions::BuiltinScalarFunction::$ENUM;
- assert_eq!(name, fun);
- assert_eq!(expected.len(), args.len());
- } else {
- assert!(false, "unexpected: {:?}", result);
- }
- };
- }
-
- macro_rules! test_nary_scalar_expr {
- ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
- let expected = vec![$(stringify!($arg)),*];
- let result = $FUNC(
- vec![
- $(
- col(stringify!($arg.to_string()))
- ),*
- ]
- );
- if let Expr::ScalarFunction { fun, args } = result {
- let name = functions::BuiltinScalarFunction::$ENUM;
- assert_eq!(name, fun);
- assert_eq!(expected.len(), args.len());
- } else {
- assert!(false, "unexpected: {:?}", result);
- }
- };
- }
-
- #[test]
fn digest_function_definitions() {
if let Expr::ScalarFunction { fun, args } = digest(col("tableA.a"), lit("md5")) {
- let name = functions::BuiltinScalarFunction::Digest;
+ let name = BuiltinScalarFunction::Digest;
assert_eq!(name, fun);
assert_eq!(2, args.len());
} else {
@@ -589,98 +281,6 @@ mod tests {
}
#[test]
- fn scalar_function_definitions() {
- test_unary_scalar_expr!(Sqrt, sqrt);
- test_unary_scalar_expr!(Sin, sin);
- test_unary_scalar_expr!(Cos, cos);
- test_unary_scalar_expr!(Tan, tan);
- test_unary_scalar_expr!(Asin, asin);
- test_unary_scalar_expr!(Acos, acos);
- test_unary_scalar_expr!(Atan, atan);
- test_unary_scalar_expr!(Floor, floor);
- test_unary_scalar_expr!(Ceil, ceil);
- test_unary_scalar_expr!(Now, now);
- test_unary_scalar_expr!(Round, round);
- test_unary_scalar_expr!(Trunc, trunc);
- test_unary_scalar_expr!(Abs, abs);
- test_unary_scalar_expr!(Signum, signum);
- test_unary_scalar_expr!(Exp, exp);
- test_unary_scalar_expr!(Log2, log2);
- test_unary_scalar_expr!(Log10, log10);
- test_unary_scalar_expr!(Ln, ln);
-
- test_scalar_expr!(Ascii, ascii, input);
- test_scalar_expr!(BitLength, bit_length, string);
- test_nary_scalar_expr!(Btrim, btrim, string);
- test_nary_scalar_expr!(Btrim, btrim, string, characters);
- test_scalar_expr!(CharacterLength, character_length, string);
- test_scalar_expr!(CharacterLength, length, string);
- test_scalar_expr!(Chr, chr, string);
- test_scalar_expr!(Digest, digest, string, algorithm);
- test_scalar_expr!(InitCap, initcap, string);
- test_scalar_expr!(Left, left, string, count);
- test_scalar_expr!(Lower, lower, string);
- test_nary_scalar_expr!(Lpad, lpad, string, count);
- test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
- test_scalar_expr!(Ltrim, ltrim, string);
- test_scalar_expr!(MD5, md5, string);
- test_scalar_expr!(OctetLength, octet_length, string);
- test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
- test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
- test_nary_scalar_expr!(
- RegexpReplace,
- regexp_replace,
- string,
- pattern,
- replacement
- );
- test_nary_scalar_expr!(
- RegexpReplace,
- regexp_replace,
- string,
- pattern,
- replacement,
- flags
- );
- test_scalar_expr!(Replace, replace, string, from, to);
- test_scalar_expr!(Repeat, repeat, string, count);
- test_scalar_expr!(Reverse, reverse, string);
- test_scalar_expr!(Right, right, string, count);
- test_nary_scalar_expr!(Rpad, rpad, string, count);
- test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
- test_scalar_expr!(Rtrim, rtrim, string);
- test_scalar_expr!(SHA224, sha224, string);
- test_scalar_expr!(SHA256, sha256, string);
- test_scalar_expr!(SHA384, sha384, string);
- test_scalar_expr!(SHA512, sha512, string);
- test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
- test_scalar_expr!(StartsWith, starts_with, string, characters);
- test_scalar_expr!(Strpos, strpos, string, substring);
- test_scalar_expr!(Substr, substr, string, position);
- test_scalar_expr!(ToHex, to_hex, string);
- test_scalar_expr!(Translate, translate, string, from, to);
- test_scalar_expr!(Trim, trim, string);
- test_scalar_expr!(Upper, upper, string);
-
- test_scalar_expr!(DatePart, date_part, part, date);
- test_scalar_expr!(DateTrunc, date_trunc, part, date);
- }
-
- #[test]
- fn test_partial_ord() {
- // Test validates that partial ord is defined for Expr using hashes, not
- // intended to exhaustively test all possibilities
- let exp1 = col("a") + lit(1);
- let exp2 = col("a") + lit(2);
- let exp3 = !(col("a") + lit(2));
-
- assert!(exp1 < exp2);
- assert!(exp2 > exp1);
- assert!(exp2 > exp3);
- assert!(exp3 < exp2);
- }
-
- #[test]
fn combine_zero_filters() {
let result = combine_filters(&[]);
assert_eq!(result, None);
diff --git a/datafusion/src/logical_plan/operators.rs b/datafusion/src/logical_plan/operators.rs
index 2f12928..132f8a8 100644
--- a/datafusion/src/logical_plan/operators.rs
+++ b/datafusion/src/logical_plan/operators.rs
@@ -16,32 +16,3 @@
// under the License.
pub use datafusion_expr::Operator;
-
-#[cfg(test)]
-mod tests {
- use crate::prelude::lit;
-
- #[test]
- fn test_operators() {
- assert_eq!(
- format!("{:?}", lit(1u32) + lit(2u32)),
- "UInt32(1) + UInt32(2)"
- );
- assert_eq!(
- format!("{:?}", lit(1u32) - lit(2u32)),
- "UInt32(1) - UInt32(2)"
- );
- assert_eq!(
- format!("{:?}", lit(1u32) * lit(2u32)),
- "UInt32(1) * UInt32(2)"
- );
- assert_eq!(
- format!("{:?}", lit(1u32) / lit(2u32)),
- "UInt32(1) / UInt32(2)"
- );
- assert_eq!(
- format!("{:?}", lit(1u32) % lit(2u32)),
- "UInt32(1) % UInt32(2)"
- );
- }
-}