You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/09 05:09:00 UTC

[arrow-datafusion] 01/01: move expr functions to datafusion-expr expr_fn

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch datafusion-expr-fn
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 8375cf6d1e0cfdb53de634bb9e035d7d75b5ebd8
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Feb 9 13:08:39 2022 +0800

    move expr functions to datafusion-expr expr_fn
---
 datafusion-expr/src/expr.rs              |  25 ++
 datafusion-expr/src/expr_fn.rs           | 388 ++++++++++++++++++++++++++++-
 datafusion-expr/src/lib.rs               |   4 +-
 datafusion-expr/src/operator.rs          |  29 +++
 datafusion/src/logical_plan/expr.rs      | 414 +------------------------------
 datafusion/src/logical_plan/operators.rs |  29 ---
 6 files changed, 450 insertions(+), 439 deletions(-)

diff --git a/datafusion-expr/src/expr.rs b/datafusion-expr/src/expr.rs
index f26f1df..e998ebb 100644
--- a/datafusion-expr/src/expr.rs
+++ b/datafusion-expr/src/expr.rs
@@ -696,3 +696,28 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
         )),
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::expr_fn::col;
+    use crate::lit;
+
+    #[test]
+    fn test_not() {
+        assert_eq!(lit(1).not(), !lit(1));
+    }
+
+    #[test]
+    fn test_partial_ord() {
+        // Test validates that partial ord is defined for Expr using hashes, not
+        // intended to exhaustively test all possibilities
+        let exp1 = col("a") + lit(1);
+        let exp2 = col("a") + lit(2);
+        let exp3 = !(col("a") + lit(2));
+
+        assert!(exp1 < exp2);
+        assert!(exp2 > exp1);
+        assert!(exp2 > exp3);
+        assert!(exp3 < exp2);
+    }
+}
diff --git a/datafusion-expr/src/expr_fn.rs b/datafusion-expr/src/expr_fn.rs
index 469a82d..2c3a1f4 100644
--- a/datafusion-expr/src/expr_fn.rs
+++ b/datafusion-expr/src/expr_fn.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{Expr, Operator};
+use crate::{aggregate_function, built_in_function, lit, Expr, Operator};
 
 /// Create a column expression based on a qualified or unqualified column name
 pub fn col(ident: &str) -> Expr {
@@ -30,3 +30,389 @@ pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
         right: Box::new(r),
     }
 }
+
+/// return a new expression with a logical AND
+pub fn and(left: Expr, right: Expr) -> Expr {
+    Expr::BinaryExpr {
+        left: Box::new(left),
+        op: Operator::And,
+        right: Box::new(right),
+    }
+}
+
+/// return a new expression with a logical OR
+pub fn or(left: Expr, right: Expr) -> Expr {
+    Expr::BinaryExpr {
+        left: Box::new(left),
+        op: Operator::Or,
+        right: Box::new(right),
+    }
+}
+
+/// Create an expression to represent the min() aggregate function
+pub fn min(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Min,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the max() aggregate function
+pub fn max(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Max,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the sum() aggregate function
+pub fn sum(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Sum,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the avg() aggregate function
+pub fn avg(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Avg,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the count() aggregate function
+pub fn count(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Count,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the count(distinct) aggregate function
+pub fn count_distinct(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Count,
+        distinct: true,
+        args: vec![expr],
+    }
+}
+
+/// Create an in_list expression
+pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
+    Expr::InList {
+        expr: Box::new(expr),
+        list,
+        negated,
+    }
+}
+
+/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
+pub fn concat(args: &[Expr]) -> Expr {
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::Concat,
+        args: args.to_vec(),
+    }
+}
+
+/// Concatenates all but the first argument, with separators.
+/// The first argument is used as the separator string, and should not be NULL.
+/// Other NULL arguments are ignored.
+pub fn concat_ws(sep: impl Into<String>, values: &[Expr]) -> Expr {
+    let mut args = vec![lit(sep.into())];
+    args.extend_from_slice(values);
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::ConcatWithSeparator,
+        args,
+    }
+}
+
+/// Returns a random value in the range 0.0 <= x < 1.0
+pub fn random() -> Expr {
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::Random,
+        args: vec![],
+    }
+}
+
+/// Returns the approximate number of distinct input values.
+/// This function provides an approximation of count(DISTINCT x).
+/// Zero is returned if all input values are null.
+/// This function should produce a standard error of 0.81%,
+/// which is the standard deviation of the (approximately normal)
+/// error distribution over all possible sets.
+/// It does not guarantee an upper bound on the error for any specific input set.
+pub fn approx_distinct(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::ApproxDistinct,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Calculate an approximation of the specified `percentile` for `expr`.
+pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::ApproxPercentileCont,
+        distinct: false,
+        args: vec![expr, percentile],
+    }
+}
+
+// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
+// varying arity functions
+/// Create an convenience function representing a unary scalar function
+macro_rules! unary_scalar_expr {
+    ($ENUM:ident, $FUNC:ident) => {
+        #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ]
+        pub fn $FUNC(e: Expr) -> Expr {
+            Expr::ScalarFunction {
+                fun: built_in_function::BuiltinScalarFunction::$ENUM,
+                args: vec![e],
+            }
+        }
+    };
+}
+
+macro_rules! scalar_expr {
+    ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
+        pub fn $FUNC($($arg: Expr),*) -> Expr {
+            Expr::ScalarFunction {
+                fun: built_in_function::BuiltinScalarFunction::$ENUM,
+                args: vec![$($arg),*],
+            }
+        }
+    };
+}
+
+macro_rules! nary_scalar_expr {
+    ($ENUM:ident, $FUNC:ident) => {
+        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
+        pub fn $FUNC(args: Vec<Expr>) -> Expr {
+            Expr::ScalarFunction {
+                fun: built_in_function::BuiltinScalarFunction::$ENUM,
+                args,
+            }
+        }
+    };
+}
+
+// generate methods for creating the supported unary/binary expressions
+
+// math functions
+unary_scalar_expr!(Sqrt, sqrt);
+unary_scalar_expr!(Sin, sin);
+unary_scalar_expr!(Cos, cos);
+unary_scalar_expr!(Tan, tan);
+unary_scalar_expr!(Asin, asin);
+unary_scalar_expr!(Acos, acos);
+unary_scalar_expr!(Atan, atan);
+unary_scalar_expr!(Floor, floor);
+unary_scalar_expr!(Ceil, ceil);
+unary_scalar_expr!(Now, now);
+unary_scalar_expr!(Round, round);
+unary_scalar_expr!(Trunc, trunc);
+unary_scalar_expr!(Abs, abs);
+unary_scalar_expr!(Signum, signum);
+unary_scalar_expr!(Exp, exp);
+unary_scalar_expr!(Log2, log2);
+unary_scalar_expr!(Log10, log10);
+unary_scalar_expr!(Ln, ln);
+
+// string functions
+scalar_expr!(Ascii, ascii, string);
+scalar_expr!(BitLength, bit_length, string);
+nary_scalar_expr!(Btrim, btrim);
+scalar_expr!(CharacterLength, character_length, string);
+scalar_expr!(CharacterLength, length, string);
+scalar_expr!(Chr, chr, string);
+scalar_expr!(Digest, digest, string, algorithm);
+scalar_expr!(InitCap, initcap, string);
+scalar_expr!(Left, left, string, count);
+scalar_expr!(Lower, lower, string);
+nary_scalar_expr!(Lpad, lpad);
+scalar_expr!(Ltrim, ltrim, string);
+scalar_expr!(MD5, md5, string);
+scalar_expr!(OctetLength, octet_length, string);
+nary_scalar_expr!(RegexpMatch, regexp_match);
+nary_scalar_expr!(RegexpReplace, regexp_replace);
+scalar_expr!(Replace, replace, string, from, to);
+scalar_expr!(Repeat, repeat, string, count);
+scalar_expr!(Reverse, reverse, string);
+scalar_expr!(Right, right, string, count);
+nary_scalar_expr!(Rpad, rpad);
+scalar_expr!(Rtrim, rtrim, string);
+scalar_expr!(SHA224, sha224, string);
+scalar_expr!(SHA256, sha256, string);
+scalar_expr!(SHA384, sha384, string);
+scalar_expr!(SHA512, sha512, string);
+scalar_expr!(SplitPart, split_part, expr, delimiter, index);
+scalar_expr!(StartsWith, starts_with, string, characters);
+scalar_expr!(Strpos, strpos, string, substring);
+scalar_expr!(Substr, substr, string, position);
+scalar_expr!(ToHex, to_hex, string);
+scalar_expr!(Translate, translate, string, from, to);
+scalar_expr!(Trim, trim, string);
+scalar_expr!(Upper, upper, string);
+
+// date functions
+scalar_expr!(DatePart, date_part, part, date);
+scalar_expr!(DateTrunc, date_trunc, part, date);
+
+/// returns an array of fixed size with each argument on it.
+pub fn array(args: Vec<Expr>) -> Expr {
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::Array,
+        args,
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn filter_is_null_and_is_not_null() {
+        let col_null = col("col1");
+        let col_not_null = col("col2");
+        assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
+        assert_eq!(
+            format!("{:?}", col_not_null.is_not_null()),
+            "#col2 IS NOT NULL"
+        );
+    }
+
+    macro_rules! test_unary_scalar_expr {
+        ($ENUM:ident, $FUNC:ident) => {{
+            if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) {
+                let name = built_in_function::BuiltinScalarFunction::$ENUM;
+                assert_eq!(name, fun);
+                assert_eq!(1, args.len());
+            } else {
+                assert!(false, "unexpected");
+            }
+        }};
+    }
+
+    macro_rules! test_scalar_expr {
+        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+            let expected = vec![$(stringify!($arg)),*];
+            let result = $FUNC(
+                $(
+                    col(stringify!($arg.to_string()))
+                ),*
+            );
+            if let Expr::ScalarFunction { fun, args } = result {
+                let name = built_in_function::BuiltinScalarFunction::$ENUM;
+                assert_eq!(name, fun);
+                assert_eq!(expected.len(), args.len());
+            } else {
+                assert!(false, "unexpected: {:?}", result);
+            }
+        };
+    }
+
+    macro_rules! test_nary_scalar_expr {
+        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+            let expected = vec![$(stringify!($arg)),*];
+            let result = $FUNC(
+                vec![
+                    $(
+                        col(stringify!($arg.to_string()))
+                    ),*
+                ]
+            );
+            if let Expr::ScalarFunction { fun, args } = result {
+                let name = built_in_function::BuiltinScalarFunction::$ENUM;
+                assert_eq!(name, fun);
+                assert_eq!(expected.len(), args.len());
+            } else {
+                assert!(false, "unexpected: {:?}", result);
+            }
+        };
+    }
+
+    #[test]
+    fn scalar_function_definitions() {
+        test_unary_scalar_expr!(Sqrt, sqrt);
+        test_unary_scalar_expr!(Sin, sin);
+        test_unary_scalar_expr!(Cos, cos);
+        test_unary_scalar_expr!(Tan, tan);
+        test_unary_scalar_expr!(Asin, asin);
+        test_unary_scalar_expr!(Acos, acos);
+        test_unary_scalar_expr!(Atan, atan);
+        test_unary_scalar_expr!(Floor, floor);
+        test_unary_scalar_expr!(Ceil, ceil);
+        test_unary_scalar_expr!(Now, now);
+        test_unary_scalar_expr!(Round, round);
+        test_unary_scalar_expr!(Trunc, trunc);
+        test_unary_scalar_expr!(Abs, abs);
+        test_unary_scalar_expr!(Signum, signum);
+        test_unary_scalar_expr!(Exp, exp);
+        test_unary_scalar_expr!(Log2, log2);
+        test_unary_scalar_expr!(Log10, log10);
+        test_unary_scalar_expr!(Ln, ln);
+
+        test_scalar_expr!(Ascii, ascii, input);
+        test_scalar_expr!(BitLength, bit_length, string);
+        test_nary_scalar_expr!(Btrim, btrim, string);
+        test_nary_scalar_expr!(Btrim, btrim, string, characters);
+        test_scalar_expr!(CharacterLength, character_length, string);
+        test_scalar_expr!(CharacterLength, length, string);
+        test_scalar_expr!(Chr, chr, string);
+        test_scalar_expr!(Digest, digest, string, algorithm);
+        test_scalar_expr!(InitCap, initcap, string);
+        test_scalar_expr!(Left, left, string, count);
+        test_scalar_expr!(Lower, lower, string);
+        test_nary_scalar_expr!(Lpad, lpad, string, count);
+        test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
+        test_scalar_expr!(Ltrim, ltrim, string);
+        test_scalar_expr!(MD5, md5, string);
+        test_scalar_expr!(OctetLength, octet_length, string);
+        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
+        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
+        test_nary_scalar_expr!(
+            RegexpReplace,
+            regexp_replace,
+            string,
+            pattern,
+            replacement
+        );
+        test_nary_scalar_expr!(
+            RegexpReplace,
+            regexp_replace,
+            string,
+            pattern,
+            replacement,
+            flags
+        );
+        test_scalar_expr!(Replace, replace, string, from, to);
+        test_scalar_expr!(Repeat, repeat, string, count);
+        test_scalar_expr!(Reverse, reverse, string);
+        test_scalar_expr!(Right, right, string, count);
+        test_nary_scalar_expr!(Rpad, rpad, string, count);
+        test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
+        test_scalar_expr!(Rtrim, rtrim, string);
+        test_scalar_expr!(SHA224, sha224, string);
+        test_scalar_expr!(SHA256, sha256, string);
+        test_scalar_expr!(SHA384, sha384, string);
+        test_scalar_expr!(SHA512, sha512, string);
+        test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
+        test_scalar_expr!(StartsWith, starts_with, string, characters);
+        test_scalar_expr!(Strpos, strpos, string, substring);
+        test_scalar_expr!(Substr, substr, string, position);
+        test_scalar_expr!(ToHex, to_hex, string);
+        test_scalar_expr!(Translate, translate, string, from, to);
+        test_scalar_expr!(Trim, trim, string);
+        test_scalar_expr!(Upper, upper, string);
+
+        test_scalar_expr!(DatePart, date_part, part, date);
+        test_scalar_expr!(DateTrunc, date_trunc, part, date);
+    }
+}
diff --git a/datafusion-expr/src/lib.rs b/datafusion-expr/src/lib.rs
index 709fa63..1d0837f 100644
--- a/datafusion-expr/src/lib.rs
+++ b/datafusion-expr/src/lib.rs
@@ -37,8 +37,8 @@ pub use columnar_value::{ColumnarValue, NullColumnarValue};
 pub use expr::Expr;
 pub use expr_fn::col;
 pub use function::{
-    AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
-    StateTypeFunction,
+  AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
+  StateTypeFunction,
 };
 pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
 pub use operator::Operator;
diff --git a/datafusion-expr/src/operator.rs b/datafusion-expr/src/operator.rs
index a1cad76..585627f 100644
--- a/datafusion-expr/src/operator.rs
+++ b/datafusion-expr/src/operator.rs
@@ -138,3 +138,32 @@ impl ops::Rem for Expr {
         binary_expr(self, Operator::Modulo, rhs)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::lit;
+
+    #[test]
+    fn test_operators() {
+        assert_eq!(
+            format!("{:?}", lit(1u32) + lit(2u32)),
+            "UInt32(1) + UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) - lit(2u32)),
+            "UInt32(1) - UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) * lit(2u32)),
+            "UInt32(1) * UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) / lit(2u32)),
+            "UInt32(1) / UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) % lit(2u32)),
+            "UInt32(1) % UInt32(2)"
+        );
+    }
+}
diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
index de05298..ba853ee 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -19,15 +19,17 @@
 //! such as `col = 5` or `SUM(col)`. See examples on the [`Expr`] struct.
 
 pub use super::Operator;
-use crate::error::{DataFusionError, Result};
+use crate::error::Result;
 use crate::logical_plan::ExprSchemable;
 use crate::logical_plan::{DFField, DFSchema};
 use crate::physical_plan::udaf::AggregateUDF;
-use crate::physical_plan::{aggregates, functions, udf::ScalarUDF};
+use crate::physical_plan::udf::ScalarUDF;
 use arrow::datatypes::DataType;
+use datafusion_common::DataFusionError;
 pub use datafusion_common::{Column, ExprSchema};
-pub use datafusion_expr::expr_fn::col;
+pub use datafusion_expr::expr_fn::*;
 use datafusion_expr::AccumulatorFunctionImplementation;
+use datafusion_expr::BuiltinScalarFunction;
 pub use datafusion_expr::Expr;
 use datafusion_expr::StateTypeFunction;
 pub use datafusion_expr::{lit, lit_timestamp_nano, Literal};
@@ -64,9 +66,7 @@ impl CaseBuilder {
     pub fn end(&self) -> Result<Expr> {
         self.build()
     }
-}
 
-impl CaseBuilder {
     fn build(&self) -> Result<Expr> {
         // collect all "then" expressions
         let mut then_expr = self.then_expr.clone();
@@ -127,15 +127,6 @@ pub fn when(when: Expr, then: Expr) -> CaseBuilder {
     }
 }
 
-/// return a new expression with a logical AND
-pub fn and(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::And,
-        right: Box::new(right),
-    }
-}
-
 /// Combines an array of filter expressions into a single filter expression
 /// consisting of the input filter expressions joined with logical AND.
 /// Returns None if the filters array is empty.
@@ -150,15 +141,6 @@ pub fn combine_filters(filters: &[Expr]) -> Option<Expr> {
     Some(combined_filter)
 }
 
-/// return a new expression with a logical OR
-pub fn or(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::Or,
-        right: Box::new(right),
-    }
-}
-
 /// Convert an expression into Column expression if it's already provided as input plan.
 ///
 /// For example, it rewrites:
@@ -200,230 +182,6 @@ pub fn unalias(expr: Expr) -> Expr {
     }
 }
 
-/// Create an expression to represent the min() aggregate function
-pub fn min(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Min,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the max() aggregate function
-pub fn max(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Max,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the sum() aggregate function
-pub fn sum(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Sum,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the avg() aggregate function
-pub fn avg(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Avg,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count() aggregate function
-pub fn count(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count(distinct) aggregate function
-pub fn count_distinct(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: true,
-        args: vec![expr],
-    }
-}
-
-/// Create an in_list expression
-pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
-    Expr::InList {
-        expr: Box::new(expr),
-        list,
-        negated,
-    }
-}
-
-/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
-pub fn concat(args: &[Expr]) -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Concat,
-        args: args.to_vec(),
-    }
-}
-
-/// Concatenates all but the first argument, with separators.
-/// The first argument is used as the separator string, and should not be NULL.
-/// Other NULL arguments are ignored.
-pub fn concat_ws(sep: impl Into<String>, values: &[Expr]) -> Expr {
-    let mut args = vec![lit(sep.into())];
-    args.extend_from_slice(values);
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::ConcatWithSeparator,
-        args,
-    }
-}
-
-/// Returns a random value in the range 0.0 <= x < 1.0
-pub fn random() -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Random,
-        args: vec![],
-    }
-}
-
-/// Returns the approximate number of distinct input values.
-/// This function provides an approximation of count(DISTINCT x).
-/// Zero is returned if all input values are null.
-/// This function should produce a standard error of 0.81%,
-/// which is the standard deviation of the (approximately normal)
-/// error distribution over all possible sets.
-/// It does not guarantee an upper bound on the error for any specific input set.
-pub fn approx_distinct(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::ApproxDistinct,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Calculate an approximation of the specified `percentile` for `expr`.
-pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::ApproxPercentileCont,
-        distinct: false,
-        args: vec![expr, percentile],
-    }
-}
-
-// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
-// varying arity functions
-/// Create an convenience function representing a unary scalar function
-macro_rules! unary_scalar_expr {
-    ($ENUM:ident, $FUNC:ident) => {
-        #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ]
-        pub fn $FUNC(e: Expr) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args: vec![e],
-            }
-        }
-    };
-}
-
-macro_rules! scalar_expr {
-    ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
-        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
-        pub fn $FUNC($($arg: Expr),*) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args: vec![$($arg),*],
-            }
-        }
-    };
-}
-
-macro_rules! nary_scalar_expr {
-    ($ENUM:ident, $FUNC:ident) => {
-        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
-        pub fn $FUNC(args: Vec<Expr>) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args,
-            }
-        }
-    };
-}
-
-// generate methods for creating the supported unary/binary expressions
-
-// math functions
-unary_scalar_expr!(Sqrt, sqrt);
-unary_scalar_expr!(Sin, sin);
-unary_scalar_expr!(Cos, cos);
-unary_scalar_expr!(Tan, tan);
-unary_scalar_expr!(Asin, asin);
-unary_scalar_expr!(Acos, acos);
-unary_scalar_expr!(Atan, atan);
-unary_scalar_expr!(Floor, floor);
-unary_scalar_expr!(Ceil, ceil);
-unary_scalar_expr!(Now, now);
-unary_scalar_expr!(Round, round);
-unary_scalar_expr!(Trunc, trunc);
-unary_scalar_expr!(Abs, abs);
-unary_scalar_expr!(Signum, signum);
-unary_scalar_expr!(Exp, exp);
-unary_scalar_expr!(Log2, log2);
-unary_scalar_expr!(Log10, log10);
-unary_scalar_expr!(Ln, ln);
-
-// string functions
-scalar_expr!(Ascii, ascii, string);
-scalar_expr!(BitLength, bit_length, string);
-nary_scalar_expr!(Btrim, btrim);
-scalar_expr!(CharacterLength, character_length, string);
-scalar_expr!(CharacterLength, length, string);
-scalar_expr!(Chr, chr, string);
-scalar_expr!(Digest, digest, string, algorithm);
-scalar_expr!(InitCap, initcap, string);
-scalar_expr!(Left, left, string, count);
-scalar_expr!(Lower, lower, string);
-nary_scalar_expr!(Lpad, lpad);
-scalar_expr!(Ltrim, ltrim, string);
-scalar_expr!(MD5, md5, string);
-scalar_expr!(OctetLength, octet_length, string);
-nary_scalar_expr!(RegexpMatch, regexp_match);
-nary_scalar_expr!(RegexpReplace, regexp_replace);
-scalar_expr!(Replace, replace, string, from, to);
-scalar_expr!(Repeat, repeat, string, count);
-scalar_expr!(Reverse, reverse, string);
-scalar_expr!(Right, right, string, count);
-nary_scalar_expr!(Rpad, rpad);
-scalar_expr!(Rtrim, rtrim, string);
-scalar_expr!(SHA224, sha224, string);
-scalar_expr!(SHA256, sha256, string);
-scalar_expr!(SHA384, sha384, string);
-scalar_expr!(SHA512, sha512, string);
-scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-scalar_expr!(StartsWith, starts_with, string, characters);
-scalar_expr!(Strpos, strpos, string, substring);
-scalar_expr!(Substr, substr, string, position);
-scalar_expr!(ToHex, to_hex, string);
-scalar_expr!(Translate, translate, string, from, to);
-scalar_expr!(Trim, trim, string);
-scalar_expr!(Upper, upper, string);
-
-// date functions
-scalar_expr!(DatePart, date_part, part, date);
-scalar_expr!(DateTrunc, date_trunc, part, date);
-
-/// returns an array of fixed size with each argument on it.
-pub fn array(args: Vec<Expr>) -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Array,
-        args,
-    }
-}
-
 /// Creates a new UDF with a specific signature and specific return type.
 /// This is a helper function to create a new UDF.
 /// The function `create_udf` returns a subset of all possible `ScalarFunction`:
@@ -483,7 +241,7 @@ pub fn exprlist_to_fields<'a>(
 /// let expr = call_fn("sin", vec![col("x")]).unwrap().lt(lit(0.2));
 /// ```
 pub fn call_fn(name: impl AsRef<str>, args: Vec<Expr>) -> Result<Expr> {
-    match name.as_ref().parse::<functions::BuiltinScalarFunction>() {
+    match name.as_ref().parse::<BuiltinScalarFunction>() {
         Ok(fun) => Ok(Expr::ScalarFunction { fun, args }),
         Err(e) => Err(e),
     }
@@ -512,75 +270,9 @@ mod tests {
     }
 
     #[test]
-    fn filter_is_null_and_is_not_null() {
-        let col_null = col("col1");
-        let col_not_null = col("col2");
-        assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
-        assert_eq!(
-            format!("{:?}", col_not_null.is_not_null()),
-            "#col2 IS NOT NULL"
-        );
-    }
-
-    #[test]
-    fn test_not() {
-        assert_eq!(lit(1).not(), !lit(1));
-    }
-
-    macro_rules! test_unary_scalar_expr {
-        ($ENUM:ident, $FUNC:ident) => {{
-            if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) {
-                let name = functions::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(1, args.len());
-            } else {
-                assert!(false, "unexpected");
-            }
-        }};
-    }
-
-    macro_rules! test_scalar_expr {
-        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
-            let expected = vec![$(stringify!($arg)),*];
-            let result = $FUNC(
-                $(
-                    col(stringify!($arg.to_string()))
-                ),*
-            );
-            if let Expr::ScalarFunction { fun, args } = result {
-                let name = functions::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(expected.len(), args.len());
-            } else {
-                assert!(false, "unexpected: {:?}", result);
-            }
-        };
-    }
-
-    macro_rules! test_nary_scalar_expr {
-        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
-            let expected = vec![$(stringify!($arg)),*];
-            let result = $FUNC(
-                vec![
-                    $(
-                        col(stringify!($arg.to_string()))
-                    ),*
-                ]
-            );
-            if let Expr::ScalarFunction { fun, args } = result {
-                let name = functions::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(expected.len(), args.len());
-            } else {
-                assert!(false, "unexpected: {:?}", result);
-            }
-        };
-    }
-
-    #[test]
     fn digest_function_definitions() {
         if let Expr::ScalarFunction { fun, args } = digest(col("tableA.a"), lit("md5")) {
-            let name = functions::BuiltinScalarFunction::Digest;
+            let name = BuiltinScalarFunction::Digest;
             assert_eq!(name, fun);
             assert_eq!(2, args.len());
         } else {
@@ -589,98 +281,6 @@ mod tests {
     }
 
     #[test]
-    fn scalar_function_definitions() {
-        test_unary_scalar_expr!(Sqrt, sqrt);
-        test_unary_scalar_expr!(Sin, sin);
-        test_unary_scalar_expr!(Cos, cos);
-        test_unary_scalar_expr!(Tan, tan);
-        test_unary_scalar_expr!(Asin, asin);
-        test_unary_scalar_expr!(Acos, acos);
-        test_unary_scalar_expr!(Atan, atan);
-        test_unary_scalar_expr!(Floor, floor);
-        test_unary_scalar_expr!(Ceil, ceil);
-        test_unary_scalar_expr!(Now, now);
-        test_unary_scalar_expr!(Round, round);
-        test_unary_scalar_expr!(Trunc, trunc);
-        test_unary_scalar_expr!(Abs, abs);
-        test_unary_scalar_expr!(Signum, signum);
-        test_unary_scalar_expr!(Exp, exp);
-        test_unary_scalar_expr!(Log2, log2);
-        test_unary_scalar_expr!(Log10, log10);
-        test_unary_scalar_expr!(Ln, ln);
-
-        test_scalar_expr!(Ascii, ascii, input);
-        test_scalar_expr!(BitLength, bit_length, string);
-        test_nary_scalar_expr!(Btrim, btrim, string);
-        test_nary_scalar_expr!(Btrim, btrim, string, characters);
-        test_scalar_expr!(CharacterLength, character_length, string);
-        test_scalar_expr!(CharacterLength, length, string);
-        test_scalar_expr!(Chr, chr, string);
-        test_scalar_expr!(Digest, digest, string, algorithm);
-        test_scalar_expr!(InitCap, initcap, string);
-        test_scalar_expr!(Left, left, string, count);
-        test_scalar_expr!(Lower, lower, string);
-        test_nary_scalar_expr!(Lpad, lpad, string, count);
-        test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
-        test_scalar_expr!(Ltrim, ltrim, string);
-        test_scalar_expr!(MD5, md5, string);
-        test_scalar_expr!(OctetLength, octet_length, string);
-        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
-        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
-        test_nary_scalar_expr!(
-            RegexpReplace,
-            regexp_replace,
-            string,
-            pattern,
-            replacement
-        );
-        test_nary_scalar_expr!(
-            RegexpReplace,
-            regexp_replace,
-            string,
-            pattern,
-            replacement,
-            flags
-        );
-        test_scalar_expr!(Replace, replace, string, from, to);
-        test_scalar_expr!(Repeat, repeat, string, count);
-        test_scalar_expr!(Reverse, reverse, string);
-        test_scalar_expr!(Right, right, string, count);
-        test_nary_scalar_expr!(Rpad, rpad, string, count);
-        test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
-        test_scalar_expr!(Rtrim, rtrim, string);
-        test_scalar_expr!(SHA224, sha224, string);
-        test_scalar_expr!(SHA256, sha256, string);
-        test_scalar_expr!(SHA384, sha384, string);
-        test_scalar_expr!(SHA512, sha512, string);
-        test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-        test_scalar_expr!(StartsWith, starts_with, string, characters);
-        test_scalar_expr!(Strpos, strpos, string, substring);
-        test_scalar_expr!(Substr, substr, string, position);
-        test_scalar_expr!(ToHex, to_hex, string);
-        test_scalar_expr!(Translate, translate, string, from, to);
-        test_scalar_expr!(Trim, trim, string);
-        test_scalar_expr!(Upper, upper, string);
-
-        test_scalar_expr!(DatePart, date_part, part, date);
-        test_scalar_expr!(DateTrunc, date_trunc, part, date);
-    }
-
-    #[test]
-    fn test_partial_ord() {
-        // Test validates that partial ord is defined for Expr using hashes, not
-        // intended to exhaustively test all possibilities
-        let exp1 = col("a") + lit(1);
-        let exp2 = col("a") + lit(2);
-        let exp3 = !(col("a") + lit(2));
-
-        assert!(exp1 < exp2);
-        assert!(exp2 > exp1);
-        assert!(exp2 > exp3);
-        assert!(exp3 < exp2);
-    }
-
-    #[test]
     fn combine_zero_filters() {
         let result = combine_filters(&[]);
         assert_eq!(result, None);
diff --git a/datafusion/src/logical_plan/operators.rs b/datafusion/src/logical_plan/operators.rs
index 2f12928..132f8a8 100644
--- a/datafusion/src/logical_plan/operators.rs
+++ b/datafusion/src/logical_plan/operators.rs
@@ -16,32 +16,3 @@
 // under the License.
 
 pub use datafusion_expr::Operator;
-
-#[cfg(test)]
-mod tests {
-    use crate::prelude::lit;
-
-    #[test]
-    fn test_operators() {
-        assert_eq!(
-            format!("{:?}", lit(1u32) + lit(2u32)),
-            "UInt32(1) + UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) - lit(2u32)),
-            "UInt32(1) - UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) * lit(2u32)),
-            "UInt32(1) * UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) / lit(2u32)),
-            "UInt32(1) / UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) % lit(2u32)),
-            "UInt32(1) % UInt32(2)"
-        );
-    }
-}