You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/09 13:16:42 UTC

[arrow-datafusion] branch physical-plan updated (e5d417e -> 6f86fb9)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a change to branch physical-plan
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


 discard e5d417e  remove reference to logical plan in physical plan
    omit 22a282d  add module level comments
    omit 8375cf6  move expr functions to datafusion-expr expr_fn
     add 21db2c6  Add logging to datafusion cli (#1789)
     add 59ecf2b  tweak doc publishing instructions (#1790)
     add 6e02d2d  Add approx-median operator (#1729)
     new 7396899  move expr functions to datafusion-expr expr_fn
     new b0ea0b9  add module level comments
     new 6f86fb9  remove reference to logical plan in physical plan

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (e5d417e)
            \
             N -- N -- N   refs/heads/physical-plan (6f86fb9)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 ballista/rust/core/proto/ballista.proto            |   1 +
 .../rust/core/src/serde/logical_plan/to_proto.rs   |   4 +
 ballista/rust/core/src/serde/mod.rs                |   1 +
 datafusion-cli/Cargo.toml                          |   1 +
 datafusion-cli/src/main.rs                         |   1 +
 datafusion-expr/src/aggregate_function.rs          |   3 +
 datafusion/src/execution/context.rs                |   8 +-
 datafusion/src/optimizer/mod.rs                    |   1 +
 datafusion/src/optimizer/to_approx_perc.rs         | 161 +++++++++++++++++++++
 datafusion/src/physical_plan/aggregates.rs         |  77 +++++++++-
 .../physical_plan/coercion_rule/aggregate_rule.rs  |   9 ++
 .../src/physical_plan/expressions/approx_median.rs |  75 ++++++++++
 datafusion/src/physical_plan/expressions/mod.rs    |   2 +
 datafusion/tests/sql/aggregates.rs                 |  33 +++++
 docs/README.md                                     |  13 +-
 15 files changed, 382 insertions(+), 8 deletions(-)
 create mode 100644 datafusion/src/optimizer/to_approx_perc.rs
 create mode 100644 datafusion/src/physical_plan/expressions/approx_median.rs

[arrow-datafusion] 02/03: add module level comments

Posted by ji...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch physical-plan
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit b0ea0b9a7b244b4e73afa72acb6b9ba67ffc9736
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Feb 9 13:14:43 2022 +0800

    add module level comments
---
 datafusion-expr/src/accumulator.rs        |  2 ++
 datafusion-expr/src/aggregate_function.rs |  2 ++
 datafusion-expr/src/built_in_function.rs  |  2 +-
 datafusion-expr/src/columnar_value.rs     |  2 ++
 datafusion-expr/src/expr.rs               |  2 ++
 datafusion-expr/src/expr_fn.rs            | 10 ++++++----
 datafusion-expr/src/function.rs           |  2 ++
 datafusion-expr/src/lib.rs                |  4 ++--
 datafusion-expr/src/literal.rs            |  2 ++
 datafusion-expr/src/operator.rs           |  2 ++
 datafusion-expr/src/signature.rs          | 16 ++++++++++++----
 datafusion-expr/src/udaf.rs               |  2 +-
 datafusion-expr/src/udf.rs                |  2 +-
 datafusion-expr/src/window_frame.rs       |  2 +-
 datafusion-expr/src/window_function.rs    |  3 +++
 15 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/datafusion-expr/src/accumulator.rs b/datafusion-expr/src/accumulator.rs
index 599bd36..d597649 100644
--- a/datafusion-expr/src/accumulator.rs
+++ b/datafusion-expr/src/accumulator.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Accumulator module contains the trait definition for aggregation function's accumulators.
+
 use arrow::array::ArrayRef;
 use datafusion_common::{Result, ScalarValue};
 use std::fmt::Debug;
diff --git a/datafusion-expr/src/aggregate_function.rs b/datafusion-expr/src/aggregate_function.rs
index 4e03445..87b666f 100644
--- a/datafusion-expr/src/aggregate_function.rs
+++ b/datafusion-expr/src/aggregate_function.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Aggregate function module contains all built-in aggregate functions definitions
+
 use datafusion_common::{DataFusionError, Result};
 use std::{fmt, str::FromStr};
 
diff --git a/datafusion-expr/src/built_in_function.rs b/datafusion-expr/src/built_in_function.rs
index 0d5ee97..8762682 100644
--- a/datafusion-expr/src/built_in_function.rs
+++ b/datafusion-expr/src/built_in_function.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Built-in functions
+//! Built-in functions module contains all the built-in functions definitions.
 
 use crate::Volatility;
 use datafusion_common::{DataFusionError, Result};
diff --git a/datafusion-expr/src/columnar_value.rs b/datafusion-expr/src/columnar_value.rs
index 5e6959d..4867c0e 100644
--- a/datafusion-expr/src/columnar_value.rs
+++ b/datafusion-expr/src/columnar_value.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Columnar value module contains a set of types that represent a columnar value.
+
 use arrow::array::ArrayRef;
 use arrow::array::NullArray;
 use arrow::datatypes::DataType;
diff --git a/datafusion-expr/src/expr.rs b/datafusion-expr/src/expr.rs
index e998ebb..d3cbf70 100644
--- a/datafusion-expr/src/expr.rs
+++ b/datafusion-expr/src/expr.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Expr module contains core type definition for `Expr`.
+
 use crate::aggregate_function;
 use crate::built_in_function;
 use crate::expr_fn::binary_expr;
diff --git a/datafusion-expr/src/expr_fn.rs b/datafusion-expr/src/expr_fn.rs
index 2c3a1f4..d39269c 100644
--- a/datafusion-expr/src/expr_fn.rs
+++ b/datafusion-expr/src/expr_fn.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Expr fn module contains the functional definitions for expressions.
+
 use crate::{aggregate_function, built_in_function, lit, Expr, Operator};
 
 /// Create a column expression based on a qualified or unqualified column name
@@ -22,7 +24,7 @@ pub fn col(ident: &str) -> Expr {
     Expr::Column(ident.into())
 }
 
-/// return a new expression l <op> r
+/// Return a new expression l <op> r
 pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
     Expr::BinaryExpr {
         left: Box::new(l),
@@ -31,7 +33,7 @@ pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
     }
 }
 
-/// return a new expression with a logical AND
+/// Return a new expression with a logical AND
 pub fn and(left: Expr, right: Expr) -> Expr {
     Expr::BinaryExpr {
         left: Box::new(left),
@@ -40,7 +42,7 @@ pub fn and(left: Expr, right: Expr) -> Expr {
     }
 }
 
-/// return a new expression with a logical OR
+/// Return a new expression with a logical OR
 pub fn or(left: Expr, right: Expr) -> Expr {
     Expr::BinaryExpr {
         left: Box::new(left),
@@ -265,7 +267,7 @@ scalar_expr!(Upper, upper, string);
 scalar_expr!(DatePart, date_part, part, date);
 scalar_expr!(DateTrunc, date_trunc, part, date);
 
-/// returns an array of fixed size with each argument on it.
+/// Returns an array of fixed size with each argument on it.
 pub fn array(args: Vec<Expr>) -> Expr {
     Expr::ScalarFunction {
         fun: built_in_function::BuiltinScalarFunction::Array,
diff --git a/datafusion-expr/src/function.rs b/datafusion-expr/src/function.rs
index 2bacd6a..3689ff7 100644
--- a/datafusion-expr/src/function.rs
+++ b/datafusion-expr/src/function.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Function module contains typing and signature for built-in and user defined functions.
+
 use crate::Accumulator;
 use crate::ColumnarValue;
 use arrow::datatypes::DataType;
diff --git a/datafusion-expr/src/lib.rs b/datafusion-expr/src/lib.rs
index 1d0837f..709fa63 100644
--- a/datafusion-expr/src/lib.rs
+++ b/datafusion-expr/src/lib.rs
@@ -37,8 +37,8 @@ pub use columnar_value::{ColumnarValue, NullColumnarValue};
 pub use expr::Expr;
 pub use expr_fn::col;
 pub use function::{
-  AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
-  StateTypeFunction,
+    AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
+    StateTypeFunction,
 };
 pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
 pub use operator::Operator;
diff --git a/datafusion-expr/src/literal.rs b/datafusion-expr/src/literal.rs
index 02c75af..08646b8 100644
--- a/datafusion-expr/src/literal.rs
+++ b/datafusion-expr/src/literal.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Literal module contains foundational types that are used to represent literals in DataFusion.
+
 use crate::Expr;
 use datafusion_common::ScalarValue;
 
diff --git a/datafusion-expr/src/operator.rs b/datafusion-expr/src/operator.rs
index 585627f..0d3f177 100644
--- a/datafusion-expr/src/operator.rs
+++ b/datafusion-expr/src/operator.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Operator module contains foundational types that are used to represent operators in DataFusion.
+
 use crate::expr_fn::binary_expr;
 use crate::Expr;
 use std::fmt;
diff --git a/datafusion-expr/src/signature.rs b/datafusion-expr/src/signature.rs
index 5c27f42..b347448 100644
--- a/datafusion-expr/src/signature.rs
+++ b/datafusion-expr/src/signature.rs
@@ -15,16 +15,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Signature module contains foundational types that are used to represent signatures, types,
+//! and return types of functions in DataFusion.
+
 use arrow::datatypes::DataType;
 
 ///A function's volatility, which defines the functions eligibility for certain optimizations
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub enum Volatility {
-    /// Immutable - An immutable function will always return the same output when given the same input. An example of this is [BuiltinScalarFunction::Cos].
+    /// Immutable - An immutable function will always return the same output when given the same
+    /// input. An example of this is [BuiltinScalarFunction::Cos].
     Immutable,
-    /// Stable - A stable function may return different values given the same input accross different queries but must return the same value for a given input within a query. An example of this is [BuiltinScalarFunction::Now].
+    /// Stable - A stable function may return different values given the same input across different
+    /// queries but must return the same value for a given input within a query. An example of
+    /// this is [BuiltinScalarFunction::Now].
     Stable,
-    /// Volatile - A volatile function may change the return value from evaluation to evaluation. Mutiple invocations of a volatile function may return different results when used in the same query. An example of this is [BuiltinScalarFunction::Random].
+    /// Volatile - A volatile function may change the return value from evaluation to evaluation.
+    /// Multiple invocations of a volatile function may return different results when used in the
+    /// same query. An example of this is [BuiltinScalarFunction::Random].
     Volatile,
 }
 
@@ -92,7 +100,7 @@ impl Signature {
             volatility,
         }
     }
-    /// exact - Creates a signture which must match the types in exact_types in order.
+    /// exact - Creates a signature which must match the types in exact_types in order.
     pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::Exact(exact_types),
diff --git a/datafusion-expr/src/udaf.rs b/datafusion-expr/src/udaf.rs
index a39d58b..8c15da4 100644
--- a/datafusion-expr/src/udaf.rs
+++ b/datafusion-expr/src/udaf.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module contains functions and structs supporting user-defined aggregate functions.
+//! Udaf module contains functions and structs supporting user-defined aggregate functions.
 
 use crate::Expr;
 use crate::{
diff --git a/datafusion-expr/src/udf.rs b/datafusion-expr/src/udf.rs
index 79a17a4..4d60b29 100644
--- a/datafusion-expr/src/udf.rs
+++ b/datafusion-expr/src/udf.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! UDF support
+//! Udf module contains foundational types that are used to represent UDFs in DataFusion.
 
 use crate::{Expr, ReturnTypeFunction, ScalarFunctionImplementation, Signature};
 use std::fmt;
diff --git a/datafusion-expr/src/window_frame.rs b/datafusion-expr/src/window_frame.rs
index ba65a50..a0d6ed0 100644
--- a/datafusion-expr/src/window_frame.rs
+++ b/datafusion-expr/src/window_frame.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Window frame
+//! Window frame module
 //!
 //! The frame-spec determines which output rows are read by an aggregate window function. The frame-spec consists of four parts:
 //! - A frame type - either ROWS, RANGE or GROUPS,
diff --git a/datafusion-expr/src/window_function.rs b/datafusion-expr/src/window_function.rs
index 59523d6..bccf653 100644
--- a/datafusion-expr/src/window_function.rs
+++ b/datafusion-expr/src/window_function.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Window function module contains foundational types that are used to represent window functions
+//! in DataFusion.
+
 use crate::aggregate_function::AggregateFunction;
 use datafusion_common::{DataFusionError, Result};
 use std::{fmt, str::FromStr};

[arrow-datafusion] 01/03: move expr functions to datafusion-expr expr_fn

Posted by ji...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch physical-plan
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 73968992f0bd0d40299b9b243cca3d807ac97cb6
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Feb 9 13:08:39 2022 +0800

    move expr functions to datafusion-expr expr_fn
---
 datafusion-expr/src/expr.rs              |  25 ++
 datafusion-expr/src/expr_fn.rs           | 388 ++++++++++++++++++++++++++++-
 datafusion-expr/src/lib.rs               |   4 +-
 datafusion-expr/src/operator.rs          |  29 +++
 datafusion/src/logical_plan/expr.rs      | 414 +------------------------------
 datafusion/src/logical_plan/operators.rs |  29 ---
 6 files changed, 450 insertions(+), 439 deletions(-)

diff --git a/datafusion-expr/src/expr.rs b/datafusion-expr/src/expr.rs
index f26f1df..e998ebb 100644
--- a/datafusion-expr/src/expr.rs
+++ b/datafusion-expr/src/expr.rs
@@ -696,3 +696,28 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
         )),
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::expr_fn::col;
+    use crate::lit;
+
+    #[test]
+    fn test_not() {
+        assert_eq!(lit(1).not(), !lit(1));
+    }
+
+    #[test]
+    fn test_partial_ord() {
+        // Test validates that partial ord is defined for Expr using hashes, not
+        // intended to exhaustively test all possibilities
+        let exp1 = col("a") + lit(1);
+        let exp2 = col("a") + lit(2);
+        let exp3 = !(col("a") + lit(2));
+
+        assert!(exp1 < exp2);
+        assert!(exp2 > exp1);
+        assert!(exp2 > exp3);
+        assert!(exp3 < exp2);
+    }
+}
diff --git a/datafusion-expr/src/expr_fn.rs b/datafusion-expr/src/expr_fn.rs
index 469a82d..2c3a1f4 100644
--- a/datafusion-expr/src/expr_fn.rs
+++ b/datafusion-expr/src/expr_fn.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{Expr, Operator};
+use crate::{aggregate_function, built_in_function, lit, Expr, Operator};
 
 /// Create a column expression based on a qualified or unqualified column name
 pub fn col(ident: &str) -> Expr {
@@ -30,3 +30,389 @@ pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
         right: Box::new(r),
     }
 }
+
+/// return a new expression with a logical AND
+pub fn and(left: Expr, right: Expr) -> Expr {
+    Expr::BinaryExpr {
+        left: Box::new(left),
+        op: Operator::And,
+        right: Box::new(right),
+    }
+}
+
+/// return a new expression with a logical OR
+pub fn or(left: Expr, right: Expr) -> Expr {
+    Expr::BinaryExpr {
+        left: Box::new(left),
+        op: Operator::Or,
+        right: Box::new(right),
+    }
+}
+
+/// Create an expression to represent the min() aggregate function
+pub fn min(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Min,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the max() aggregate function
+pub fn max(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Max,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the sum() aggregate function
+pub fn sum(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Sum,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the avg() aggregate function
+pub fn avg(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Avg,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the count() aggregate function
+pub fn count(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Count,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Create an expression to represent the count(distinct) aggregate function
+pub fn count_distinct(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::Count,
+        distinct: true,
+        args: vec![expr],
+    }
+}
+
+/// Create an in_list expression
+pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
+    Expr::InList {
+        expr: Box::new(expr),
+        list,
+        negated,
+    }
+}
+
+/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
+pub fn concat(args: &[Expr]) -> Expr {
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::Concat,
+        args: args.to_vec(),
+    }
+}
+
+/// Concatenates all but the first argument, with separators.
+/// The first argument is used as the separator string, and should not be NULL.
+/// Other NULL arguments are ignored.
+pub fn concat_ws(sep: impl Into<String>, values: &[Expr]) -> Expr {
+    let mut args = vec![lit(sep.into())];
+    args.extend_from_slice(values);
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::ConcatWithSeparator,
+        args,
+    }
+}
+
+/// Returns a random value in the range 0.0 <= x < 1.0
+pub fn random() -> Expr {
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::Random,
+        args: vec![],
+    }
+}
+
+/// Returns the approximate number of distinct input values.
+/// This function provides an approximation of count(DISTINCT x).
+/// Zero is returned if all input values are null.
+/// This function should produce a standard error of 0.81%,
+/// which is the standard deviation of the (approximately normal)
+/// error distribution over all possible sets.
+/// It does not guarantee an upper bound on the error for any specific input set.
+pub fn approx_distinct(expr: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::ApproxDistinct,
+        distinct: false,
+        args: vec![expr],
+    }
+}
+
+/// Calculate an approximation of the specified `percentile` for `expr`.
+pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
+    Expr::AggregateFunction {
+        fun: aggregate_function::AggregateFunction::ApproxPercentileCont,
+        distinct: false,
+        args: vec![expr, percentile],
+    }
+}
+
+// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
+// varying arity functions
+/// Create an convenience function representing a unary scalar function
+macro_rules! unary_scalar_expr {
+    ($ENUM:ident, $FUNC:ident) => {
+        #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ]
+        pub fn $FUNC(e: Expr) -> Expr {
+            Expr::ScalarFunction {
+                fun: built_in_function::BuiltinScalarFunction::$ENUM,
+                args: vec![e],
+            }
+        }
+    };
+}
+
+macro_rules! scalar_expr {
+    ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
+        pub fn $FUNC($($arg: Expr),*) -> Expr {
+            Expr::ScalarFunction {
+                fun: built_in_function::BuiltinScalarFunction::$ENUM,
+                args: vec![$($arg),*],
+            }
+        }
+    };
+}
+
+macro_rules! nary_scalar_expr {
+    ($ENUM:ident, $FUNC:ident) => {
+        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
+        pub fn $FUNC(args: Vec<Expr>) -> Expr {
+            Expr::ScalarFunction {
+                fun: built_in_function::BuiltinScalarFunction::$ENUM,
+                args,
+            }
+        }
+    };
+}
+
+// generate methods for creating the supported unary/binary expressions
+
+// math functions
+unary_scalar_expr!(Sqrt, sqrt);
+unary_scalar_expr!(Sin, sin);
+unary_scalar_expr!(Cos, cos);
+unary_scalar_expr!(Tan, tan);
+unary_scalar_expr!(Asin, asin);
+unary_scalar_expr!(Acos, acos);
+unary_scalar_expr!(Atan, atan);
+unary_scalar_expr!(Floor, floor);
+unary_scalar_expr!(Ceil, ceil);
+unary_scalar_expr!(Now, now);
+unary_scalar_expr!(Round, round);
+unary_scalar_expr!(Trunc, trunc);
+unary_scalar_expr!(Abs, abs);
+unary_scalar_expr!(Signum, signum);
+unary_scalar_expr!(Exp, exp);
+unary_scalar_expr!(Log2, log2);
+unary_scalar_expr!(Log10, log10);
+unary_scalar_expr!(Ln, ln);
+
+// string functions
+scalar_expr!(Ascii, ascii, string);
+scalar_expr!(BitLength, bit_length, string);
+nary_scalar_expr!(Btrim, btrim);
+scalar_expr!(CharacterLength, character_length, string);
+scalar_expr!(CharacterLength, length, string);
+scalar_expr!(Chr, chr, string);
+scalar_expr!(Digest, digest, string, algorithm);
+scalar_expr!(InitCap, initcap, string);
+scalar_expr!(Left, left, string, count);
+scalar_expr!(Lower, lower, string);
+nary_scalar_expr!(Lpad, lpad);
+scalar_expr!(Ltrim, ltrim, string);
+scalar_expr!(MD5, md5, string);
+scalar_expr!(OctetLength, octet_length, string);
+nary_scalar_expr!(RegexpMatch, regexp_match);
+nary_scalar_expr!(RegexpReplace, regexp_replace);
+scalar_expr!(Replace, replace, string, from, to);
+scalar_expr!(Repeat, repeat, string, count);
+scalar_expr!(Reverse, reverse, string);
+scalar_expr!(Right, right, string, count);
+nary_scalar_expr!(Rpad, rpad);
+scalar_expr!(Rtrim, rtrim, string);
+scalar_expr!(SHA224, sha224, string);
+scalar_expr!(SHA256, sha256, string);
+scalar_expr!(SHA384, sha384, string);
+scalar_expr!(SHA512, sha512, string);
+scalar_expr!(SplitPart, split_part, expr, delimiter, index);
+scalar_expr!(StartsWith, starts_with, string, characters);
+scalar_expr!(Strpos, strpos, string, substring);
+scalar_expr!(Substr, substr, string, position);
+scalar_expr!(ToHex, to_hex, string);
+scalar_expr!(Translate, translate, string, from, to);
+scalar_expr!(Trim, trim, string);
+scalar_expr!(Upper, upper, string);
+
+// date functions
+scalar_expr!(DatePart, date_part, part, date);
+scalar_expr!(DateTrunc, date_trunc, part, date);
+
+/// returns an array of fixed size with each argument on it.
+pub fn array(args: Vec<Expr>) -> Expr {
+    Expr::ScalarFunction {
+        fun: built_in_function::BuiltinScalarFunction::Array,
+        args,
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn filter_is_null_and_is_not_null() {
+        let col_null = col("col1");
+        let col_not_null = col("col2");
+        assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
+        assert_eq!(
+            format!("{:?}", col_not_null.is_not_null()),
+            "#col2 IS NOT NULL"
+        );
+    }
+
+    macro_rules! test_unary_scalar_expr {
+        ($ENUM:ident, $FUNC:ident) => {{
+            if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) {
+                let name = built_in_function::BuiltinScalarFunction::$ENUM;
+                assert_eq!(name, fun);
+                assert_eq!(1, args.len());
+            } else {
+                assert!(false, "unexpected");
+            }
+        }};
+    }
+
+    macro_rules! test_scalar_expr {
+        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+            let expected = vec![$(stringify!($arg)),*];
+            let result = $FUNC(
+                $(
+                    col(stringify!($arg.to_string()))
+                ),*
+            );
+            if let Expr::ScalarFunction { fun, args } = result {
+                let name = built_in_function::BuiltinScalarFunction::$ENUM;
+                assert_eq!(name, fun);
+                assert_eq!(expected.len(), args.len());
+            } else {
+                assert!(false, "unexpected: {:?}", result);
+            }
+        };
+    }
+
+    macro_rules! test_nary_scalar_expr {
+        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
+            let expected = vec![$(stringify!($arg)),*];
+            let result = $FUNC(
+                vec![
+                    $(
+                        col(stringify!($arg.to_string()))
+                    ),*
+                ]
+            );
+            if let Expr::ScalarFunction { fun, args } = result {
+                let name = built_in_function::BuiltinScalarFunction::$ENUM;
+                assert_eq!(name, fun);
+                assert_eq!(expected.len(), args.len());
+            } else {
+                assert!(false, "unexpected: {:?}", result);
+            }
+        };
+    }
+
+    #[test]
+    fn scalar_function_definitions() {
+        test_unary_scalar_expr!(Sqrt, sqrt);
+        test_unary_scalar_expr!(Sin, sin);
+        test_unary_scalar_expr!(Cos, cos);
+        test_unary_scalar_expr!(Tan, tan);
+        test_unary_scalar_expr!(Asin, asin);
+        test_unary_scalar_expr!(Acos, acos);
+        test_unary_scalar_expr!(Atan, atan);
+        test_unary_scalar_expr!(Floor, floor);
+        test_unary_scalar_expr!(Ceil, ceil);
+        test_unary_scalar_expr!(Now, now);
+        test_unary_scalar_expr!(Round, round);
+        test_unary_scalar_expr!(Trunc, trunc);
+        test_unary_scalar_expr!(Abs, abs);
+        test_unary_scalar_expr!(Signum, signum);
+        test_unary_scalar_expr!(Exp, exp);
+        test_unary_scalar_expr!(Log2, log2);
+        test_unary_scalar_expr!(Log10, log10);
+        test_unary_scalar_expr!(Ln, ln);
+
+        test_scalar_expr!(Ascii, ascii, input);
+        test_scalar_expr!(BitLength, bit_length, string);
+        test_nary_scalar_expr!(Btrim, btrim, string);
+        test_nary_scalar_expr!(Btrim, btrim, string, characters);
+        test_scalar_expr!(CharacterLength, character_length, string);
+        test_scalar_expr!(CharacterLength, length, string);
+        test_scalar_expr!(Chr, chr, string);
+        test_scalar_expr!(Digest, digest, string, algorithm);
+        test_scalar_expr!(InitCap, initcap, string);
+        test_scalar_expr!(Left, left, string, count);
+        test_scalar_expr!(Lower, lower, string);
+        test_nary_scalar_expr!(Lpad, lpad, string, count);
+        test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
+        test_scalar_expr!(Ltrim, ltrim, string);
+        test_scalar_expr!(MD5, md5, string);
+        test_scalar_expr!(OctetLength, octet_length, string);
+        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
+        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
+        test_nary_scalar_expr!(
+            RegexpReplace,
+            regexp_replace,
+            string,
+            pattern,
+            replacement
+        );
+        test_nary_scalar_expr!(
+            RegexpReplace,
+            regexp_replace,
+            string,
+            pattern,
+            replacement,
+            flags
+        );
+        test_scalar_expr!(Replace, replace, string, from, to);
+        test_scalar_expr!(Repeat, repeat, string, count);
+        test_scalar_expr!(Reverse, reverse, string);
+        test_scalar_expr!(Right, right, string, count);
+        test_nary_scalar_expr!(Rpad, rpad, string, count);
+        test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
+        test_scalar_expr!(Rtrim, rtrim, string);
+        test_scalar_expr!(SHA224, sha224, string);
+        test_scalar_expr!(SHA256, sha256, string);
+        test_scalar_expr!(SHA384, sha384, string);
+        test_scalar_expr!(SHA512, sha512, string);
+        test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
+        test_scalar_expr!(StartsWith, starts_with, string, characters);
+        test_scalar_expr!(Strpos, strpos, string, substring);
+        test_scalar_expr!(Substr, substr, string, position);
+        test_scalar_expr!(ToHex, to_hex, string);
+        test_scalar_expr!(Translate, translate, string, from, to);
+        test_scalar_expr!(Trim, trim, string);
+        test_scalar_expr!(Upper, upper, string);
+
+        test_scalar_expr!(DatePart, date_part, part, date);
+        test_scalar_expr!(DateTrunc, date_trunc, part, date);
+    }
+}
diff --git a/datafusion-expr/src/lib.rs b/datafusion-expr/src/lib.rs
index 709fa63..1d0837f 100644
--- a/datafusion-expr/src/lib.rs
+++ b/datafusion-expr/src/lib.rs
@@ -37,8 +37,8 @@ pub use columnar_value::{ColumnarValue, NullColumnarValue};
 pub use expr::Expr;
 pub use expr_fn::col;
 pub use function::{
-    AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
-    StateTypeFunction,
+  AccumulatorFunctionImplementation, ReturnTypeFunction, ScalarFunctionImplementation,
+  StateTypeFunction,
 };
 pub use literal::{lit, lit_timestamp_nano, Literal, TimestampLiteral};
 pub use operator::Operator;
diff --git a/datafusion-expr/src/operator.rs b/datafusion-expr/src/operator.rs
index a1cad76..585627f 100644
--- a/datafusion-expr/src/operator.rs
+++ b/datafusion-expr/src/operator.rs
@@ -138,3 +138,32 @@ impl ops::Rem for Expr {
         binary_expr(self, Operator::Modulo, rhs)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::lit;
+
+    #[test]
+    fn test_operators() {
+        assert_eq!(
+            format!("{:?}", lit(1u32) + lit(2u32)),
+            "UInt32(1) + UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) - lit(2u32)),
+            "UInt32(1) - UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) * lit(2u32)),
+            "UInt32(1) * UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) / lit(2u32)),
+            "UInt32(1) / UInt32(2)"
+        );
+        assert_eq!(
+            format!("{:?}", lit(1u32) % lit(2u32)),
+            "UInt32(1) % UInt32(2)"
+        );
+    }
+}
diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
index de05298..ba853ee 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -19,15 +19,17 @@
 //! such as `col = 5` or `SUM(col)`. See examples on the [`Expr`] struct.
 
 pub use super::Operator;
-use crate::error::{DataFusionError, Result};
+use crate::error::Result;
 use crate::logical_plan::ExprSchemable;
 use crate::logical_plan::{DFField, DFSchema};
 use crate::physical_plan::udaf::AggregateUDF;
-use crate::physical_plan::{aggregates, functions, udf::ScalarUDF};
+use crate::physical_plan::udf::ScalarUDF;
 use arrow::datatypes::DataType;
+use datafusion_common::DataFusionError;
 pub use datafusion_common::{Column, ExprSchema};
-pub use datafusion_expr::expr_fn::col;
+pub use datafusion_expr::expr_fn::*;
 use datafusion_expr::AccumulatorFunctionImplementation;
+use datafusion_expr::BuiltinScalarFunction;
 pub use datafusion_expr::Expr;
 use datafusion_expr::StateTypeFunction;
 pub use datafusion_expr::{lit, lit_timestamp_nano, Literal};
@@ -64,9 +66,7 @@ impl CaseBuilder {
     pub fn end(&self) -> Result<Expr> {
         self.build()
     }
-}
 
-impl CaseBuilder {
     fn build(&self) -> Result<Expr> {
         // collect all "then" expressions
         let mut then_expr = self.then_expr.clone();
@@ -127,15 +127,6 @@ pub fn when(when: Expr, then: Expr) -> CaseBuilder {
     }
 }
 
-/// return a new expression with a logical AND
-pub fn and(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::And,
-        right: Box::new(right),
-    }
-}
-
 /// Combines an array of filter expressions into a single filter expression
 /// consisting of the input filter expressions joined with logical AND.
 /// Returns None if the filters array is empty.
@@ -150,15 +141,6 @@ pub fn combine_filters(filters: &[Expr]) -> Option<Expr> {
     Some(combined_filter)
 }
 
-/// return a new expression with a logical OR
-pub fn or(left: Expr, right: Expr) -> Expr {
-    Expr::BinaryExpr {
-        left: Box::new(left),
-        op: Operator::Or,
-        right: Box::new(right),
-    }
-}
-
 /// Convert an expression into Column expression if it's already provided as input plan.
 ///
 /// For example, it rewrites:
@@ -200,230 +182,6 @@ pub fn unalias(expr: Expr) -> Expr {
     }
 }
 
-/// Create an expression to represent the min() aggregate function
-pub fn min(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Min,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the max() aggregate function
-pub fn max(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Max,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the sum() aggregate function
-pub fn sum(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Sum,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the avg() aggregate function
-pub fn avg(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Avg,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count() aggregate function
-pub fn count(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Create an expression to represent the count(distinct) aggregate function
-pub fn count_distinct(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::Count,
-        distinct: true,
-        args: vec![expr],
-    }
-}
-
-/// Create an in_list expression
-pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
-    Expr::InList {
-        expr: Box::new(expr),
-        list,
-        negated,
-    }
-}
-
-/// Concatenates the text representations of all the arguments. NULL arguments are ignored.
-pub fn concat(args: &[Expr]) -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Concat,
-        args: args.to_vec(),
-    }
-}
-
-/// Concatenates all but the first argument, with separators.
-/// The first argument is used as the separator string, and should not be NULL.
-/// Other NULL arguments are ignored.
-pub fn concat_ws(sep: impl Into<String>, values: &[Expr]) -> Expr {
-    let mut args = vec![lit(sep.into())];
-    args.extend_from_slice(values);
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::ConcatWithSeparator,
-        args,
-    }
-}
-
-/// Returns a random value in the range 0.0 <= x < 1.0
-pub fn random() -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Random,
-        args: vec![],
-    }
-}
-
-/// Returns the approximate number of distinct input values.
-/// This function provides an approximation of count(DISTINCT x).
-/// Zero is returned if all input values are null.
-/// This function should produce a standard error of 0.81%,
-/// which is the standard deviation of the (approximately normal)
-/// error distribution over all possible sets.
-/// It does not guarantee an upper bound on the error for any specific input set.
-pub fn approx_distinct(expr: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::ApproxDistinct,
-        distinct: false,
-        args: vec![expr],
-    }
-}
-
-/// Calculate an approximation of the specified `percentile` for `expr`.
-pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr {
-    Expr::AggregateFunction {
-        fun: aggregates::AggregateFunction::ApproxPercentileCont,
-        distinct: false,
-        args: vec![expr, percentile],
-    }
-}
-
-// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many
-// varying arity functions
-/// Create an convenience function representing a unary scalar function
-macro_rules! unary_scalar_expr {
-    ($ENUM:ident, $FUNC:ident) => {
-        #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ]
-        pub fn $FUNC(e: Expr) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args: vec![e],
-            }
-        }
-    };
-}
-
-macro_rules! scalar_expr {
-    ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
-        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
-        pub fn $FUNC($($arg: Expr),*) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args: vec![$($arg),*],
-            }
-        }
-    };
-}
-
-macro_rules! nary_scalar_expr {
-    ($ENUM:ident, $FUNC:ident) => {
-        #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ]
-        pub fn $FUNC(args: Vec<Expr>) -> Expr {
-            Expr::ScalarFunction {
-                fun: functions::BuiltinScalarFunction::$ENUM,
-                args,
-            }
-        }
-    };
-}
-
-// generate methods for creating the supported unary/binary expressions
-
-// math functions
-unary_scalar_expr!(Sqrt, sqrt);
-unary_scalar_expr!(Sin, sin);
-unary_scalar_expr!(Cos, cos);
-unary_scalar_expr!(Tan, tan);
-unary_scalar_expr!(Asin, asin);
-unary_scalar_expr!(Acos, acos);
-unary_scalar_expr!(Atan, atan);
-unary_scalar_expr!(Floor, floor);
-unary_scalar_expr!(Ceil, ceil);
-unary_scalar_expr!(Now, now);
-unary_scalar_expr!(Round, round);
-unary_scalar_expr!(Trunc, trunc);
-unary_scalar_expr!(Abs, abs);
-unary_scalar_expr!(Signum, signum);
-unary_scalar_expr!(Exp, exp);
-unary_scalar_expr!(Log2, log2);
-unary_scalar_expr!(Log10, log10);
-unary_scalar_expr!(Ln, ln);
-
-// string functions
-scalar_expr!(Ascii, ascii, string);
-scalar_expr!(BitLength, bit_length, string);
-nary_scalar_expr!(Btrim, btrim);
-scalar_expr!(CharacterLength, character_length, string);
-scalar_expr!(CharacterLength, length, string);
-scalar_expr!(Chr, chr, string);
-scalar_expr!(Digest, digest, string, algorithm);
-scalar_expr!(InitCap, initcap, string);
-scalar_expr!(Left, left, string, count);
-scalar_expr!(Lower, lower, string);
-nary_scalar_expr!(Lpad, lpad);
-scalar_expr!(Ltrim, ltrim, string);
-scalar_expr!(MD5, md5, string);
-scalar_expr!(OctetLength, octet_length, string);
-nary_scalar_expr!(RegexpMatch, regexp_match);
-nary_scalar_expr!(RegexpReplace, regexp_replace);
-scalar_expr!(Replace, replace, string, from, to);
-scalar_expr!(Repeat, repeat, string, count);
-scalar_expr!(Reverse, reverse, string);
-scalar_expr!(Right, right, string, count);
-nary_scalar_expr!(Rpad, rpad);
-scalar_expr!(Rtrim, rtrim, string);
-scalar_expr!(SHA224, sha224, string);
-scalar_expr!(SHA256, sha256, string);
-scalar_expr!(SHA384, sha384, string);
-scalar_expr!(SHA512, sha512, string);
-scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-scalar_expr!(StartsWith, starts_with, string, characters);
-scalar_expr!(Strpos, strpos, string, substring);
-scalar_expr!(Substr, substr, string, position);
-scalar_expr!(ToHex, to_hex, string);
-scalar_expr!(Translate, translate, string, from, to);
-scalar_expr!(Trim, trim, string);
-scalar_expr!(Upper, upper, string);
-
-// date functions
-scalar_expr!(DatePart, date_part, part, date);
-scalar_expr!(DateTrunc, date_trunc, part, date);
-
-/// returns an array of fixed size with each argument on it.
-pub fn array(args: Vec<Expr>) -> Expr {
-    Expr::ScalarFunction {
-        fun: functions::BuiltinScalarFunction::Array,
-        args,
-    }
-}
-
 /// Creates a new UDF with a specific signature and specific return type.
 /// This is a helper function to create a new UDF.
 /// The function `create_udf` returns a subset of all possible `ScalarFunction`:
@@ -483,7 +241,7 @@ pub fn exprlist_to_fields<'a>(
 /// let expr = call_fn("sin", vec![col("x")]).unwrap().lt(lit(0.2));
 /// ```
 pub fn call_fn(name: impl AsRef<str>, args: Vec<Expr>) -> Result<Expr> {
-    match name.as_ref().parse::<functions::BuiltinScalarFunction>() {
+    match name.as_ref().parse::<BuiltinScalarFunction>() {
         Ok(fun) => Ok(Expr::ScalarFunction { fun, args }),
         Err(e) => Err(e),
     }
@@ -512,75 +270,9 @@ mod tests {
     }
 
     #[test]
-    fn filter_is_null_and_is_not_null() {
-        let col_null = col("col1");
-        let col_not_null = col("col2");
-        assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL");
-        assert_eq!(
-            format!("{:?}", col_not_null.is_not_null()),
-            "#col2 IS NOT NULL"
-        );
-    }
-
-    #[test]
-    fn test_not() {
-        assert_eq!(lit(1).not(), !lit(1));
-    }
-
-    macro_rules! test_unary_scalar_expr {
-        ($ENUM:ident, $FUNC:ident) => {{
-            if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) {
-                let name = functions::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(1, args.len());
-            } else {
-                assert!(false, "unexpected");
-            }
-        }};
-    }
-
-    macro_rules! test_scalar_expr {
-        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
-            let expected = vec![$(stringify!($arg)),*];
-            let result = $FUNC(
-                $(
-                    col(stringify!($arg.to_string()))
-                ),*
-            );
-            if let Expr::ScalarFunction { fun, args } = result {
-                let name = functions::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(expected.len(), args.len());
-            } else {
-                assert!(false, "unexpected: {:?}", result);
-            }
-        };
-    }
-
-    macro_rules! test_nary_scalar_expr {
-        ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => {
-            let expected = vec![$(stringify!($arg)),*];
-            let result = $FUNC(
-                vec![
-                    $(
-                        col(stringify!($arg.to_string()))
-                    ),*
-                ]
-            );
-            if let Expr::ScalarFunction { fun, args } = result {
-                let name = functions::BuiltinScalarFunction::$ENUM;
-                assert_eq!(name, fun);
-                assert_eq!(expected.len(), args.len());
-            } else {
-                assert!(false, "unexpected: {:?}", result);
-            }
-        };
-    }
-
-    #[test]
     fn digest_function_definitions() {
         if let Expr::ScalarFunction { fun, args } = digest(col("tableA.a"), lit("md5")) {
-            let name = functions::BuiltinScalarFunction::Digest;
+            let name = BuiltinScalarFunction::Digest;
             assert_eq!(name, fun);
             assert_eq!(2, args.len());
         } else {
@@ -589,98 +281,6 @@ mod tests {
     }
 
     #[test]
-    fn scalar_function_definitions() {
-        test_unary_scalar_expr!(Sqrt, sqrt);
-        test_unary_scalar_expr!(Sin, sin);
-        test_unary_scalar_expr!(Cos, cos);
-        test_unary_scalar_expr!(Tan, tan);
-        test_unary_scalar_expr!(Asin, asin);
-        test_unary_scalar_expr!(Acos, acos);
-        test_unary_scalar_expr!(Atan, atan);
-        test_unary_scalar_expr!(Floor, floor);
-        test_unary_scalar_expr!(Ceil, ceil);
-        test_unary_scalar_expr!(Now, now);
-        test_unary_scalar_expr!(Round, round);
-        test_unary_scalar_expr!(Trunc, trunc);
-        test_unary_scalar_expr!(Abs, abs);
-        test_unary_scalar_expr!(Signum, signum);
-        test_unary_scalar_expr!(Exp, exp);
-        test_unary_scalar_expr!(Log2, log2);
-        test_unary_scalar_expr!(Log10, log10);
-        test_unary_scalar_expr!(Ln, ln);
-
-        test_scalar_expr!(Ascii, ascii, input);
-        test_scalar_expr!(BitLength, bit_length, string);
-        test_nary_scalar_expr!(Btrim, btrim, string);
-        test_nary_scalar_expr!(Btrim, btrim, string, characters);
-        test_scalar_expr!(CharacterLength, character_length, string);
-        test_scalar_expr!(CharacterLength, length, string);
-        test_scalar_expr!(Chr, chr, string);
-        test_scalar_expr!(Digest, digest, string, algorithm);
-        test_scalar_expr!(InitCap, initcap, string);
-        test_scalar_expr!(Left, left, string, count);
-        test_scalar_expr!(Lower, lower, string);
-        test_nary_scalar_expr!(Lpad, lpad, string, count);
-        test_nary_scalar_expr!(Lpad, lpad, string, count, characters);
-        test_scalar_expr!(Ltrim, ltrim, string);
-        test_scalar_expr!(MD5, md5, string);
-        test_scalar_expr!(OctetLength, octet_length, string);
-        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
-        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
-        test_nary_scalar_expr!(
-            RegexpReplace,
-            regexp_replace,
-            string,
-            pattern,
-            replacement
-        );
-        test_nary_scalar_expr!(
-            RegexpReplace,
-            regexp_replace,
-            string,
-            pattern,
-            replacement,
-            flags
-        );
-        test_scalar_expr!(Replace, replace, string, from, to);
-        test_scalar_expr!(Repeat, repeat, string, count);
-        test_scalar_expr!(Reverse, reverse, string);
-        test_scalar_expr!(Right, right, string, count);
-        test_nary_scalar_expr!(Rpad, rpad, string, count);
-        test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
-        test_scalar_expr!(Rtrim, rtrim, string);
-        test_scalar_expr!(SHA224, sha224, string);
-        test_scalar_expr!(SHA256, sha256, string);
-        test_scalar_expr!(SHA384, sha384, string);
-        test_scalar_expr!(SHA512, sha512, string);
-        test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
-        test_scalar_expr!(StartsWith, starts_with, string, characters);
-        test_scalar_expr!(Strpos, strpos, string, substring);
-        test_scalar_expr!(Substr, substr, string, position);
-        test_scalar_expr!(ToHex, to_hex, string);
-        test_scalar_expr!(Translate, translate, string, from, to);
-        test_scalar_expr!(Trim, trim, string);
-        test_scalar_expr!(Upper, upper, string);
-
-        test_scalar_expr!(DatePart, date_part, part, date);
-        test_scalar_expr!(DateTrunc, date_trunc, part, date);
-    }
-
-    #[test]
-    fn test_partial_ord() {
-        // Test validates that partial ord is defined for Expr using hashes, not
-        // intended to exhaustively test all possibilities
-        let exp1 = col("a") + lit(1);
-        let exp2 = col("a") + lit(2);
-        let exp3 = !(col("a") + lit(2));
-
-        assert!(exp1 < exp2);
-        assert!(exp2 > exp1);
-        assert!(exp2 > exp3);
-        assert!(exp3 < exp2);
-    }
-
-    #[test]
     fn combine_zero_filters() {
         let result = combine_filters(&[]);
         assert_eq!(result, None);
diff --git a/datafusion/src/logical_plan/operators.rs b/datafusion/src/logical_plan/operators.rs
index 2f12928..132f8a8 100644
--- a/datafusion/src/logical_plan/operators.rs
+++ b/datafusion/src/logical_plan/operators.rs
@@ -16,32 +16,3 @@
 // under the License.
 
 pub use datafusion_expr::Operator;
-
-#[cfg(test)]
-mod tests {
-    use crate::prelude::lit;
-
-    #[test]
-    fn test_operators() {
-        assert_eq!(
-            format!("{:?}", lit(1u32) + lit(2u32)),
-            "UInt32(1) + UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) - lit(2u32)),
-            "UInt32(1) - UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) * lit(2u32)),
-            "UInt32(1) * UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) / lit(2u32)),
-            "UInt32(1) / UInt32(2)"
-        );
-        assert_eq!(
-            format!("{:?}", lit(1u32) % lit(2u32)),
-            "UInt32(1) % UInt32(2)"
-        );
-    }
-}

[arrow-datafusion] 03/03: remove reference to logical plan in physical plan

Posted by ji...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch physical-plan
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 6f86fb95e9a0226912ac8df0e211a05979141910
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Wed Feb 9 18:07:30 2022 +0800

    remove reference to logical plan in physical plan
---
 datafusion/src/physical_plan/coercion_rule/binary_rule.rs |  4 ++--
 datafusion/src/physical_plan/expressions/binary.rs        |  2 +-
 datafusion/src/physical_plan/expressions/case.rs          |  2 +-
 datafusion/src/physical_plan/file_format/parquet.rs       | 13 +++++++------
 datafusion/src/physical_plan/filter.rs                    |  3 ++-
 datafusion/src/physical_plan/planner.rs                   |  7 ++++---
 datafusion/src/physical_plan/windows/aggregate.rs         |  2 +-
 datafusion/src/physical_plan/windows/mod.rs               |  2 +-
 8 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/datafusion/src/physical_plan/coercion_rule/binary_rule.rs b/datafusion/src/physical_plan/coercion_rule/binary_rule.rs
index 426d59f..7d4dd55 100644
--- a/datafusion/src/physical_plan/coercion_rule/binary_rule.rs
+++ b/datafusion/src/physical_plan/coercion_rule/binary_rule.rs
@@ -19,8 +19,8 @@
 
 use crate::arrow::datatypes::DataType;
 use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Operator;
 use crate::scalar::{MAX_PRECISION_FOR_DECIMAL128, MAX_SCALE_FOR_DECIMAL128};
+use datafusion_expr::Operator;
 
 /// Coercion rules for all binary operators. Returns the output type
 /// of applying `op` to an argument of `lhs_type` and `rhs_type`.
@@ -494,7 +494,7 @@ mod tests {
     use super::*;
     use crate::arrow::datatypes::DataType;
     use crate::error::{DataFusionError, Result};
-    use crate::logical_plan::Operator;
+    use datafusion_expr::Operator;
 
     #[test]
 
diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs
index d1fc3bc..9f007a2 100644
--- a/datafusion/src/physical_plan/expressions/binary.rs
+++ b/datafusion/src/physical_plan/expressions/binary.rs
@@ -59,11 +59,11 @@ use arrow::error::ArrowError::DivideByZero;
 use arrow::record_batch::RecordBatch;
 
 use crate::error::{DataFusionError, Result};
-use crate::logical_plan::Operator;
 use crate::physical_plan::coercion_rule::binary_rule::coerce_types;
 use crate::physical_plan::expressions::try_cast;
 use crate::physical_plan::{ColumnarValue, PhysicalExpr};
 use crate::scalar::ScalarValue;
+use datafusion_expr::Operator;
 
 // Simple (low performance) kernels until optimized kernels are added to arrow
 // See https://github.com/apache/arrow-rs/issues/960
diff --git a/datafusion/src/physical_plan/expressions/case.rs b/datafusion/src/physical_plan/expressions/case.rs
index 2a680d3..d990d74 100644
--- a/datafusion/src/physical_plan/expressions/case.rs
+++ b/datafusion/src/physical_plan/expressions/case.rs
@@ -456,12 +456,12 @@ mod tests {
     use super::*;
     use crate::{
         error::Result,
-        logical_plan::Operator,
         physical_plan::expressions::{binary, col, lit},
         scalar::ScalarValue,
     };
     use arrow::array::StringArray;
     use arrow::datatypes::*;
+    use datafusion_expr::Operator;
 
     #[test]
     fn case_with_expr() -> Result<()> {
diff --git a/datafusion/src/physical_plan/file_format/parquet.rs b/datafusion/src/physical_plan/file_format/parquet.rs
index 40acf5a..0f4255e 100644
--- a/datafusion/src/physical_plan/file_format/parquet.rs
+++ b/datafusion/src/physical_plan/file_format/parquet.rs
@@ -26,7 +26,6 @@ use crate::datasource::object_store::ObjectStore;
 use crate::datasource::PartitionedFile;
 use crate::{
     error::{DataFusionError, Result},
-    logical_plan::{Column, Expr},
     physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
     physical_plan::{
         file_format::FileScanConfig,
@@ -37,6 +36,8 @@ use crate::{
     },
     scalar::ScalarValue,
 };
+use datafusion_common::Column;
+use datafusion_expr::Expr;
 
 use arrow::{
     array::ArrayRef,
@@ -919,7 +920,7 @@ mod tests {
 
     #[test]
     fn row_group_pruning_predicate_simple_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
+        use datafusion_expr::{col, lit};
         // int > 1 => c1_max > 1
         let expr = col("c1").gt(lit(15));
         let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
@@ -952,7 +953,7 @@ mod tests {
 
     #[test]
     fn row_group_pruning_predicate_missing_stats() -> Result<()> {
-        use crate::logical_plan::{col, lit};
+        use datafusion_expr::{col, lit};
         // int > 1 => c1_max > 1
         let expr = col("c1").gt(lit(15));
         let schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
@@ -987,7 +988,7 @@ mod tests {
 
     #[test]
     fn row_group_pruning_predicate_partial_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
+        use datafusion_expr::{col, lit};
         // test row group predicate with partially supported expression
         // int > 1 and int % 2 => c1_max > 1 and true
         let expr = col("c1").gt(lit(15)).and(col("c2").modulus(lit(2)));
@@ -1073,7 +1074,7 @@ mod tests {
 
     #[test]
     fn row_group_pruning_predicate_null_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
+        use datafusion_expr::{col, lit};
         // int > 1 and IsNull(bool) => c1_max > 1 and bool_null_count > 0
         let expr = col("c1").gt(lit(15)).and(col("c2").is_null());
         let schema = Arc::new(Schema::new(vec![
@@ -1101,7 +1102,7 @@ mod tests {
 
     #[test]
     fn row_group_pruning_predicate_eq_null_expr() -> Result<()> {
-        use crate::logical_plan::{col, lit};
+        use datafusion_expr::{col, lit};
         // test row group predicate with an unknown (Null) expr
         //
         // int > 1 and bool = NULL => c1_max > 1 and null
diff --git a/datafusion/src/physical_plan/filter.rs b/datafusion/src/physical_plan/filter.rs
index a48d112..b600b1b 100644
--- a/datafusion/src/physical_plan/filter.rs
+++ b/datafusion/src/physical_plan/filter.rs
@@ -228,13 +228,14 @@ mod tests {
 
     use super::*;
     use crate::datasource::object_store::local::LocalFileSystem;
+    use crate::physical_plan::collect;
     use crate::physical_plan::expressions::*;
     use crate::physical_plan::file_format::{CsvExec, FileScanConfig};
     use crate::physical_plan::ExecutionPlan;
     use crate::scalar::ScalarValue;
     use crate::test;
     use crate::test_util;
-    use crate::{logical_plan::Operator, physical_plan::collect};
+    use datafusion_expr::Operator;
     use std::iter::Iterator;
 
     #[tokio::test]
diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs
index bf8be3d..ee5266d 100644
--- a/datafusion/src/physical_plan/planner.rs
+++ b/datafusion/src/physical_plan/planner.rs
@@ -1430,17 +1430,18 @@ mod tests {
     use crate::execution::options::CsvReadOptions;
     use crate::execution::runtime_env::RuntimeEnv;
     use crate::logical_plan::plan::Extension;
-    use crate::logical_plan::{DFField, DFSchema, DFSchemaRef};
     use crate::physical_plan::{
         expressions, DisplayFormatType, Partitioning, Statistics,
     };
     use crate::scalar::ScalarValue;
     use crate::{
-        logical_plan::{col, lit, sum, LogicalPlanBuilder},
-        physical_plan::SendableRecordBatchStream,
+        logical_plan::LogicalPlanBuilder, physical_plan::SendableRecordBatchStream,
     };
     use arrow::datatypes::{DataType, Field, SchemaRef};
     use async_trait::async_trait;
+    use datafusion_common::{DFField, DFSchema, DFSchemaRef};
+    use datafusion_expr::sum;
+    use datafusion_expr::{col, lit};
     use fmt::Debug;
     use std::convert::TryFrom;
     use std::{any::Any, fmt};
diff --git a/datafusion/src/physical_plan/windows/aggregate.rs b/datafusion/src/physical_plan/windows/aggregate.rs
index f7c29ba..4c97e2b 100644
--- a/datafusion/src/physical_plan/windows/aggregate.rs
+++ b/datafusion/src/physical_plan/windows/aggregate.rs
@@ -18,7 +18,6 @@
 //! Physical exec for aggregate window function expressions.
 
 use crate::error::{DataFusionError, Result};
-use crate::logical_plan::window_frames::{WindowFrame, WindowFrameUnits};
 use crate::physical_plan::windows::find_ranges_in_range;
 use crate::physical_plan::{
     expressions::PhysicalSortExpr, Accumulator, AggregateExpr, PhysicalExpr, WindowExpr,
@@ -26,6 +25,7 @@ use crate::physical_plan::{
 use arrow::compute::concat;
 use arrow::record_batch::RecordBatch;
 use arrow::{array::ArrayRef, datatypes::Field};
+use datafusion_expr::{WindowFrame, WindowFrameUnits};
 use std::any::Any;
 use std::iter::IntoIterator;
 use std::ops::Range;
diff --git a/datafusion/src/physical_plan/windows/mod.rs b/datafusion/src/physical_plan/windows/mod.rs
index 243c571..b3bf9ce 100644
--- a/datafusion/src/physical_plan/windows/mod.rs
+++ b/datafusion/src/physical_plan/windows/mod.rs
@@ -18,7 +18,6 @@
 //! Physical expressions for window functions
 
 use crate::error::{DataFusionError, Result};
-use crate::logical_plan::window_frames::WindowFrame;
 use crate::physical_plan::{
     aggregates,
     expressions::{
@@ -34,6 +33,7 @@ use crate::physical_plan::{
 };
 use crate::scalar::ScalarValue;
 use arrow::datatypes::Schema;
+use datafusion_expr::WindowFrame;
 use std::convert::TryInto;
 use std::ops::Range;
 use std::sync::Arc;