You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/07 15:37:15 UTC
[arrow-datafusion] 01/01: split up expr for rewriting, visiting, and simplification
This is an automated email from the ASF dual-hosted git repository.
jiayuliu pushed a commit to branch expr-split
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
commit 0d90e5bf7dcd2d92f5579d282ed1b7b6cc8755c3
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Mon Feb 7 23:36:58 2022 +0800
split up expr for rewriting, visiting, and simplification
---
datafusion/src/datasource/listing/helpers.rs | 2 +-
datafusion/src/logical_plan/expr.rs | 262 +++++++++++----------
datafusion/src/logical_plan/mod.rs | 1 +
.../src/optimizer/common_subexpr_eliminate.rs | 4 +-
datafusion/src/optimizer/simplify_expressions.rs | 4 +-
datafusion/src/optimizer/utils.rs | 6 +-
datafusion/src/sql/utils.rs | 1 +
7 files changed, 151 insertions(+), 129 deletions(-)
diff --git a/datafusion/src/datasource/listing/helpers.rs b/datafusion/src/datasource/listing/helpers.rs
index 912179c..8ff8210 100644
--- a/datafusion/src/datasource/listing/helpers.rs
+++ b/datafusion/src/datasource/listing/helpers.rs
@@ -37,7 +37,7 @@ use log::debug;
use crate::{
error::Result,
execution::context::ExecutionContext,
- logical_plan::{self, Expr, ExpressionVisitor, Recursion},
+ logical_plan::{self, Expr, ExprVisitable, ExpressionVisitor, Recursion},
physical_plan::functions::Volatility,
scalar::ScalarValue,
};
diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
index 4b539a8..7b0599f 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -557,128 +557,13 @@ impl Expr {
nulls_first,
}
}
+}
- /// Performs a depth first walk of an expression and
- /// its children, calling [`ExpressionVisitor::pre_visit`] and
- /// `visitor.post_visit`.
- ///
- /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
- /// separate expression algorithms from the structure of the
- /// `Expr` tree and make it easier to add new types of expressions
- /// and algorithms that walk the tree.
- ///
- /// For an expression tree such as
- /// ```text
- /// BinaryExpr (GT)
- /// left: Column("foo")
- /// right: Column("bar")
- /// ```
- ///
- /// The nodes are visited using the following order
- /// ```text
- /// pre_visit(BinaryExpr(GT))
- /// pre_visit(Column("foo"))
- /// pre_visit(Column("bar"))
- /// post_visit(Column("bar"))
- /// post_visit(Column("bar"))
- /// post_visit(BinaryExpr(GT))
- /// ```
- ///
- /// If an Err result is returned, recursion is stopped immediately
- ///
- /// If `Recursion::Stop` is returned on a call to pre_visit, no
- /// children of that expression are visited, nor is post_visit
- /// called on that expression
- ///
- pub fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V> {
- let visitor = match visitor.pre_visit(self)? {
- Recursion::Continue(visitor) => visitor,
- // If the recursion should stop, do not visit children
- Recursion::Stop(visitor) => return Ok(visitor),
- };
-
- // recurse (and cover all expression types)
- let visitor = match self {
- Expr::Alias(expr, _)
- | Expr::Not(expr)
- | Expr::IsNotNull(expr)
- | Expr::IsNull(expr)
- | Expr::Negative(expr)
- | Expr::Cast { expr, .. }
- | Expr::TryCast { expr, .. }
- | Expr::Sort { expr, .. }
- | Expr::GetIndexedField { expr, .. } => expr.accept(visitor),
- Expr::Column(_)
- | Expr::ScalarVariable(_)
- | Expr::Literal(_)
- | Expr::Wildcard => Ok(visitor),
- Expr::BinaryExpr { left, right, .. } => {
- let visitor = left.accept(visitor)?;
- right.accept(visitor)
- }
- Expr::Between {
- expr, low, high, ..
- } => {
- let visitor = expr.accept(visitor)?;
- let visitor = low.accept(visitor)?;
- high.accept(visitor)
- }
- Expr::Case {
- expr,
- when_then_expr,
- else_expr,
- } => {
- let visitor = if let Some(expr) = expr.as_ref() {
- expr.accept(visitor)
- } else {
- Ok(visitor)
- }?;
- let visitor = when_then_expr.iter().try_fold(
- visitor,
- |visitor, (when, then)| {
- let visitor = when.accept(visitor)?;
- then.accept(visitor)
- },
- )?;
- if let Some(else_expr) = else_expr.as_ref() {
- else_expr.accept(visitor)
- } else {
- Ok(visitor)
- }
- }
- Expr::ScalarFunction { args, .. }
- | Expr::ScalarUDF { args, .. }
- | Expr::AggregateFunction { args, .. }
- | Expr::AggregateUDF { args, .. } => args
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
- Expr::WindowFunction {
- args,
- partition_by,
- order_by,
- ..
- } => {
- let visitor = args
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
- let visitor = partition_by
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
- let visitor = order_by
- .iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
- Ok(visitor)
- }
- Expr::InList { expr, list, .. } => {
- let visitor = expr.accept(visitor)?;
- list.iter()
- .try_fold(visitor, |visitor, arg| arg.accept(visitor))
- }
- }?;
-
- visitor.post_visit(self)
- }
+pub(crate) trait ExprRewritable: Sized {
+ fn rewrite<R: ExprRewriter>(self, rewriter: &mut R) -> Result<Self>;
+}
+impl ExprRewritable for Expr {
/// Performs a depth first walk of an expression and its children
/// to rewrite an expression, consuming `self` producing a new
/// [`Expr`].
@@ -712,7 +597,7 @@ impl Expr {
/// children of that expression are visited, nor is mutate
/// called on that expression
///
- pub fn rewrite<R>(self, rewriter: &mut R) -> Result<Self>
+ fn rewrite<R>(self, rewriter: &mut R) -> Result<Self>
where
R: ExprRewriter,
{
@@ -847,7 +732,140 @@ impl Expr {
Ok(expr)
}
}
+}
+
+pub(crate) trait ExprVisitable {
+ fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V>;
+}
+
+impl ExprVisitable for Expr {
+ /// Performs a depth first walk of an expression and
+ /// its children, calling [`ExpressionVisitor::pre_visit`] and
+ /// `visitor.post_visit`.
+ ///
+ /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
+ /// separate expression algorithms from the structure of the
+ /// `Expr` tree and make it easier to add new types of expressions
+ /// and algorithms that walk the tree.
+ ///
+ /// For an expression tree such as
+ /// ```text
+ /// BinaryExpr (GT)
+ /// left: Column("foo")
+ /// right: Column("bar")
+ /// ```
+ ///
+ /// The nodes are visited using the following order
+ /// ```text
+ /// pre_visit(BinaryExpr(GT))
+ /// pre_visit(Column("foo"))
+ /// pre_visit(Column("bar"))
+ /// post_visit(Column("bar"))
+ /// post_visit(Column("bar"))
+ /// post_visit(BinaryExpr(GT))
+ /// ```
+ ///
+ /// If an Err result is returned, recursion is stopped immediately
+ ///
+ /// If `Recursion::Stop` is returned on a call to pre_visit, no
+ /// children of that expression are visited, nor is post_visit
+ /// called on that expression
+ ///
+ fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V> {
+ let visitor = match visitor.pre_visit(self)? {
+ Recursion::Continue(visitor) => visitor,
+ // If the recursion should stop, do not visit children
+ Recursion::Stop(visitor) => return Ok(visitor),
+ };
+
+ // recurse (and cover all expression types)
+ let visitor = match self {
+ Expr::Alias(expr, _)
+ | Expr::Not(expr)
+ | Expr::IsNotNull(expr)
+ | Expr::IsNull(expr)
+ | Expr::Negative(expr)
+ | Expr::Cast { expr, .. }
+ | Expr::TryCast { expr, .. }
+ | Expr::Sort { expr, .. }
+ | Expr::GetIndexedField { expr, .. } => expr.accept(visitor),
+ Expr::Column(_)
+ | Expr::ScalarVariable(_)
+ | Expr::Literal(_)
+ | Expr::Wildcard => Ok(visitor),
+ Expr::BinaryExpr { left, right, .. } => {
+ let visitor = left.accept(visitor)?;
+ right.accept(visitor)
+ }
+ Expr::Between {
+ expr, low, high, ..
+ } => {
+ let visitor = expr.accept(visitor)?;
+ let visitor = low.accept(visitor)?;
+ high.accept(visitor)
+ }
+ Expr::Case {
+ expr,
+ when_then_expr,
+ else_expr,
+ } => {
+ let visitor = if let Some(expr) = expr.as_ref() {
+ expr.accept(visitor)
+ } else {
+ Ok(visitor)
+ }?;
+ let visitor = when_then_expr.iter().try_fold(
+ visitor,
+ |visitor, (when, then)| {
+ let visitor = when.accept(visitor)?;
+ then.accept(visitor)
+ },
+ )?;
+ if let Some(else_expr) = else_expr.as_ref() {
+ else_expr.accept(visitor)
+ } else {
+ Ok(visitor)
+ }
+ }
+ Expr::ScalarFunction { args, .. }
+ | Expr::ScalarUDF { args, .. }
+ | Expr::AggregateFunction { args, .. }
+ | Expr::AggregateUDF { args, .. } => args
+ .iter()
+ .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
+ Expr::WindowFunction {
+ args,
+ partition_by,
+ order_by,
+ ..
+ } => {
+ let visitor = args
+ .iter()
+ .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
+ let visitor = partition_by
+ .iter()
+ .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
+ let visitor = order_by
+ .iter()
+ .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
+ Ok(visitor)
+ }
+ Expr::InList { expr, list, .. } => {
+ let visitor = expr.accept(visitor)?;
+ list.iter()
+ .try_fold(visitor, |visitor, arg| arg.accept(visitor))
+ }
+ }?;
+
+ visitor.post_visit(self)
+ }
+}
+
+pub(crate) trait ExprSimplifiable: Sized {
+ fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self>;
+}
+impl ExprSimplifiable for Expr {
/// Simplifies this [`Expr`]`s as much as possible, evaluating
/// constants and applying algebraic simplifications
///
@@ -889,7 +907,7 @@ impl Expr {
/// let expr = expr.simplify(&Info::default()).unwrap();
/// assert_eq!(expr, b_lt_2);
/// ```
- pub fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
+ fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
let mut rewriter = Simplifier::new(info);
let mut const_evaluator = ConstEvaluator::new(info.execution_props());
diff --git a/datafusion/src/logical_plan/mod.rs b/datafusion/src/logical_plan/mod.rs
index ec1aea6..c67cef8 100644
--- a/datafusion/src/logical_plan/mod.rs
+++ b/datafusion/src/logical_plan/mod.rs
@@ -49,6 +49,7 @@ pub use expr::{
Column, Expr, ExprRewriter, ExprSchema, ExpressionVisitor, Literal, Recursion,
RewriteRecursion, SimplifyInfo,
};
+pub(crate) use expr::{ExprRewritable, ExprSimplifiable, ExprVisitable};
pub use extension::UserDefinedLogicalNode;
pub use operators::Operator;
pub use plan::{
diff --git a/datafusion/src/optimizer/common_subexpr_eliminate.rs b/datafusion/src/optimizer/common_subexpr_eliminate.rs
index 9470734..5c2219b 100644
--- a/datafusion/src/optimizer/common_subexpr_eliminate.rs
+++ b/datafusion/src/optimizer/common_subexpr_eliminate.rs
@@ -23,8 +23,8 @@ use crate::logical_plan::plan::{Filter, Projection, Window};
use crate::logical_plan::{
col,
plan::{Aggregate, Sort},
- DFField, DFSchema, Expr, ExprRewriter, ExpressionVisitor, LogicalPlan, Recursion,
- RewriteRecursion,
+ DFField, DFSchema, Expr, ExprRewritable, ExprRewriter, ExprVisitable,
+ ExpressionVisitor, LogicalPlan, Recursion, RewriteRecursion,
};
use crate::optimizer::optimizer::OptimizerRule;
use crate::optimizer::utils;
diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs
index 5f87542..2dbceb1 100644
--- a/datafusion/src/optimizer/simplify_expressions.rs
+++ b/datafusion/src/optimizer/simplify_expressions.rs
@@ -24,8 +24,8 @@ use arrow::record_batch::RecordBatch;
use crate::error::DataFusionError;
use crate::execution::context::ExecutionProps;
use crate::logical_plan::{
- lit, DFSchema, DFSchemaRef, Expr, ExprRewriter, LogicalPlan, RewriteRecursion,
- SimplifyInfo,
+ lit, DFSchema, DFSchemaRef, Expr, ExprRewritable, ExprRewriter, ExprSimplifiable,
+ LogicalPlan, RewriteRecursion, SimplifyInfo,
};
use crate::optimizer::optimizer::OptimizerRule;
use crate::optimizer::utils;
diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs
index f7ab836..41d1e4b 100644
--- a/datafusion/src/optimizer/utils.rs
+++ b/datafusion/src/optimizer/utils.rs
@@ -22,9 +22,11 @@ use crate::execution::context::ExecutionProps;
use crate::logical_plan::plan::{
Aggregate, Analyze, Extension, Filter, Join, Projection, Sort, Window,
};
+
use crate::logical_plan::{
- build_join_schema, Column, CreateMemoryTable, DFSchemaRef, Expr, Limit, LogicalPlan,
- LogicalPlanBuilder, Operator, Partitioning, Recursion, Repartition, Union, Values,
+ build_join_schema, Column, CreateMemoryTable, DFSchemaRef, Expr, ExprVisitable,
+ Limit, LogicalPlan, LogicalPlanBuilder, Operator, Partitioning, Recursion,
+ Repartition, Union, Values,
};
use crate::prelude::lit;
use crate::scalar::ScalarValue;
diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs
index d0cef0f..cbe40d6 100644
--- a/datafusion/src/sql/utils.rs
+++ b/datafusion/src/sql/utils.rs
@@ -20,6 +20,7 @@
use arrow::datatypes::DataType;
use sqlparser::ast::Ident;
+use crate::logical_plan::ExprVisitable;
use crate::logical_plan::{Expr, LogicalPlan};
use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
use crate::{