You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2022/02/07 15:37:14 UTC

[arrow-datafusion] branch expr-split created (now 0d90e5b)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a change to branch expr-split
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git.


      at 0d90e5b  split up expr for rewriting, visiting, and simplification

This branch includes the following new commits:

     new 0d90e5b  split up expr for rewriting, visiting, and simplification

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[arrow-datafusion] 01/01: split up expr for rewriting, visiting, and simplification

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch expr-split
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 0d90e5bf7dcd2d92f5579d282ed1b7b6cc8755c3
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Mon Feb 7 23:36:58 2022 +0800

    split up expr for rewriting, visiting, and simplification
---
 datafusion/src/datasource/listing/helpers.rs       |   2 +-
 datafusion/src/logical_plan/expr.rs                | 262 +++++++++++----------
 datafusion/src/logical_plan/mod.rs                 |   1 +
 .../src/optimizer/common_subexpr_eliminate.rs      |   4 +-
 datafusion/src/optimizer/simplify_expressions.rs   |   4 +-
 datafusion/src/optimizer/utils.rs                  |   6 +-
 datafusion/src/sql/utils.rs                        |   1 +
 7 files changed, 151 insertions(+), 129 deletions(-)

diff --git a/datafusion/src/datasource/listing/helpers.rs b/datafusion/src/datasource/listing/helpers.rs
index 912179c..8ff8210 100644
--- a/datafusion/src/datasource/listing/helpers.rs
+++ b/datafusion/src/datasource/listing/helpers.rs
@@ -37,7 +37,7 @@ use log::debug;
 use crate::{
     error::Result,
     execution::context::ExecutionContext,
-    logical_plan::{self, Expr, ExpressionVisitor, Recursion},
+    logical_plan::{self, Expr, ExprVisitable, ExpressionVisitor, Recursion},
     physical_plan::functions::Volatility,
     scalar::ScalarValue,
 };
diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
index 4b539a8..7b0599f 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -557,128 +557,13 @@ impl Expr {
             nulls_first,
         }
     }
+}
 
-    /// Performs a depth first walk of an expression and
-    /// its children, calling [`ExpressionVisitor::pre_visit`] and
-    /// `visitor.post_visit`.
-    ///
-    /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
-    /// separate expression algorithms from the structure of the
-    /// `Expr` tree and make it easier to add new types of expressions
-    /// and algorithms that walk the tree.
-    ///
-    /// For an expression tree such as
-    /// ```text
-    /// BinaryExpr (GT)
-    ///    left: Column("foo")
-    ///    right: Column("bar")
-    /// ```
-    ///
-    /// The nodes are visited using the following order
-    /// ```text
-    /// pre_visit(BinaryExpr(GT))
-    /// pre_visit(Column("foo"))
-    /// pre_visit(Column("bar"))
-    /// post_visit(Column("bar"))
-    /// post_visit(Column("bar"))
-    /// post_visit(BinaryExpr(GT))
-    /// ```
-    ///
-    /// If an Err result is returned, recursion is stopped immediately
-    ///
-    /// If `Recursion::Stop` is returned on a call to pre_visit, no
-    /// children of that expression are visited, nor is post_visit
-    /// called on that expression
-    ///
-    pub fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V> {
-        let visitor = match visitor.pre_visit(self)? {
-            Recursion::Continue(visitor) => visitor,
-            // If the recursion should stop, do not visit children
-            Recursion::Stop(visitor) => return Ok(visitor),
-        };
-
-        // recurse (and cover all expression types)
-        let visitor = match self {
-            Expr::Alias(expr, _)
-            | Expr::Not(expr)
-            | Expr::IsNotNull(expr)
-            | Expr::IsNull(expr)
-            | Expr::Negative(expr)
-            | Expr::Cast { expr, .. }
-            | Expr::TryCast { expr, .. }
-            | Expr::Sort { expr, .. }
-            | Expr::GetIndexedField { expr, .. } => expr.accept(visitor),
-            Expr::Column(_)
-            | Expr::ScalarVariable(_)
-            | Expr::Literal(_)
-            | Expr::Wildcard => Ok(visitor),
-            Expr::BinaryExpr { left, right, .. } => {
-                let visitor = left.accept(visitor)?;
-                right.accept(visitor)
-            }
-            Expr::Between {
-                expr, low, high, ..
-            } => {
-                let visitor = expr.accept(visitor)?;
-                let visitor = low.accept(visitor)?;
-                high.accept(visitor)
-            }
-            Expr::Case {
-                expr,
-                when_then_expr,
-                else_expr,
-            } => {
-                let visitor = if let Some(expr) = expr.as_ref() {
-                    expr.accept(visitor)
-                } else {
-                    Ok(visitor)
-                }?;
-                let visitor = when_then_expr.iter().try_fold(
-                    visitor,
-                    |visitor, (when, then)| {
-                        let visitor = when.accept(visitor)?;
-                        then.accept(visitor)
-                    },
-                )?;
-                if let Some(else_expr) = else_expr.as_ref() {
-                    else_expr.accept(visitor)
-                } else {
-                    Ok(visitor)
-                }
-            }
-            Expr::ScalarFunction { args, .. }
-            | Expr::ScalarUDF { args, .. }
-            | Expr::AggregateFunction { args, .. }
-            | Expr::AggregateUDF { args, .. } => args
-                .iter()
-                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
-            Expr::WindowFunction {
-                args,
-                partition_by,
-                order_by,
-                ..
-            } => {
-                let visitor = args
-                    .iter()
-                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
-                let visitor = partition_by
-                    .iter()
-                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
-                let visitor = order_by
-                    .iter()
-                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
-                Ok(visitor)
-            }
-            Expr::InList { expr, list, .. } => {
-                let visitor = expr.accept(visitor)?;
-                list.iter()
-                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))
-            }
-        }?;
-
-        visitor.post_visit(self)
-    }
+pub(crate) trait ExprRewritable: Sized {
+    fn rewrite<R: ExprRewriter>(self, rewriter: &mut R) -> Result<Self>;
+}
 
+impl ExprRewritable for Expr {
     /// Performs a depth first walk of an expression and its children
     /// to rewrite an expression, consuming `self` producing a new
     /// [`Expr`].
@@ -712,7 +597,7 @@ impl Expr {
     /// children of that expression are visited, nor is mutate
     /// called on that expression
     ///
-    pub fn rewrite<R>(self, rewriter: &mut R) -> Result<Self>
+    fn rewrite<R>(self, rewriter: &mut R) -> Result<Self>
     where
         R: ExprRewriter,
     {
@@ -847,7 +732,140 @@ impl Expr {
             Ok(expr)
         }
     }
+}
+
+pub(crate) trait ExprVisitable {
+    fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V>;
+}
+
+impl ExprVisitable for Expr {
+    /// Performs a depth first walk of an expression and
+    /// its children, calling [`ExpressionVisitor::pre_visit`] and
+    /// `visitor.post_visit`.
+    ///
+    /// Implements the [visitor pattern](https://en.wikipedia.org/wiki/Visitor_pattern) to
+    /// separate expression algorithms from the structure of the
+    /// `Expr` tree and make it easier to add new types of expressions
+    /// and algorithms that walk the tree.
+    ///
+    /// For an expression tree such as
+    /// ```text
+    /// BinaryExpr (GT)
+    ///    left: Column("foo")
+    ///    right: Column("bar")
+    /// ```
+    ///
+    /// The nodes are visited using the following order
+    /// ```text
+    /// pre_visit(BinaryExpr(GT))
+    /// pre_visit(Column("foo"))
+    /// pre_visit(Column("bar"))
+    /// post_visit(Column("bar"))
+    /// post_visit(Column("bar"))
+    /// post_visit(BinaryExpr(GT))
+    /// ```
+    ///
+    /// If an Err result is returned, recursion is stopped immediately
+    ///
+    /// If `Recursion::Stop` is returned on a call to pre_visit, no
+    /// children of that expression are visited, nor is post_visit
+    /// called on that expression
+    ///
+    fn accept<V: ExpressionVisitor>(&self, visitor: V) -> Result<V> {
+        let visitor = match visitor.pre_visit(self)? {
+            Recursion::Continue(visitor) => visitor,
+            // If the recursion should stop, do not visit children
+            Recursion::Stop(visitor) => return Ok(visitor),
+        };
+
+        // recurse (and cover all expression types)
+        let visitor = match self {
+            Expr::Alias(expr, _)
+            | Expr::Not(expr)
+            | Expr::IsNotNull(expr)
+            | Expr::IsNull(expr)
+            | Expr::Negative(expr)
+            | Expr::Cast { expr, .. }
+            | Expr::TryCast { expr, .. }
+            | Expr::Sort { expr, .. }
+            | Expr::GetIndexedField { expr, .. } => expr.accept(visitor),
+            Expr::Column(_)
+            | Expr::ScalarVariable(_)
+            | Expr::Literal(_)
+            | Expr::Wildcard => Ok(visitor),
+            Expr::BinaryExpr { left, right, .. } => {
+                let visitor = left.accept(visitor)?;
+                right.accept(visitor)
+            }
+            Expr::Between {
+                expr, low, high, ..
+            } => {
+                let visitor = expr.accept(visitor)?;
+                let visitor = low.accept(visitor)?;
+                high.accept(visitor)
+            }
+            Expr::Case {
+                expr,
+                when_then_expr,
+                else_expr,
+            } => {
+                let visitor = if let Some(expr) = expr.as_ref() {
+                    expr.accept(visitor)
+                } else {
+                    Ok(visitor)
+                }?;
+                let visitor = when_then_expr.iter().try_fold(
+                    visitor,
+                    |visitor, (when, then)| {
+                        let visitor = when.accept(visitor)?;
+                        then.accept(visitor)
+                    },
+                )?;
+                if let Some(else_expr) = else_expr.as_ref() {
+                    else_expr.accept(visitor)
+                } else {
+                    Ok(visitor)
+                }
+            }
+            Expr::ScalarFunction { args, .. }
+            | Expr::ScalarUDF { args, .. }
+            | Expr::AggregateFunction { args, .. }
+            | Expr::AggregateUDF { args, .. } => args
+                .iter()
+                .try_fold(visitor, |visitor, arg| arg.accept(visitor)),
+            Expr::WindowFunction {
+                args,
+                partition_by,
+                order_by,
+                ..
+            } => {
+                let visitor = args
+                    .iter()
+                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
+                let visitor = partition_by
+                    .iter()
+                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
+                let visitor = order_by
+                    .iter()
+                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))?;
+                Ok(visitor)
+            }
+            Expr::InList { expr, list, .. } => {
+                let visitor = expr.accept(visitor)?;
+                list.iter()
+                    .try_fold(visitor, |visitor, arg| arg.accept(visitor))
+            }
+        }?;
+
+        visitor.post_visit(self)
+    }
+}
+
+pub(crate) trait ExprSimplifiable: Sized {
+    fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self>;
+}
 
+impl ExprSimplifiable for Expr {
     /// Simplifies this [`Expr`]`s as much as possible, evaluating
     /// constants and applying algebraic simplifications
     ///
@@ -889,7 +907,7 @@ impl Expr {
     /// let expr = expr.simplify(&Info::default()).unwrap();
     /// assert_eq!(expr, b_lt_2);
     /// ```
-    pub fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
+    fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
         let mut rewriter = Simplifier::new(info);
         let mut const_evaluator = ConstEvaluator::new(info.execution_props());
 
diff --git a/datafusion/src/logical_plan/mod.rs b/datafusion/src/logical_plan/mod.rs
index ec1aea6..c67cef8 100644
--- a/datafusion/src/logical_plan/mod.rs
+++ b/datafusion/src/logical_plan/mod.rs
@@ -49,6 +49,7 @@ pub use expr::{
     Column, Expr, ExprRewriter, ExprSchema, ExpressionVisitor, Literal, Recursion,
     RewriteRecursion, SimplifyInfo,
 };
+pub(crate) use expr::{ExprRewritable, ExprSimplifiable, ExprVisitable};
 pub use extension::UserDefinedLogicalNode;
 pub use operators::Operator;
 pub use plan::{
diff --git a/datafusion/src/optimizer/common_subexpr_eliminate.rs b/datafusion/src/optimizer/common_subexpr_eliminate.rs
index 9470734..5c2219b 100644
--- a/datafusion/src/optimizer/common_subexpr_eliminate.rs
+++ b/datafusion/src/optimizer/common_subexpr_eliminate.rs
@@ -23,8 +23,8 @@ use crate::logical_plan::plan::{Filter, Projection, Window};
 use crate::logical_plan::{
     col,
     plan::{Aggregate, Sort},
-    DFField, DFSchema, Expr, ExprRewriter, ExpressionVisitor, LogicalPlan, Recursion,
-    RewriteRecursion,
+    DFField, DFSchema, Expr, ExprRewritable, ExprRewriter, ExprVisitable,
+    ExpressionVisitor, LogicalPlan, Recursion, RewriteRecursion,
 };
 use crate::optimizer::optimizer::OptimizerRule;
 use crate::optimizer::utils;
diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs
index 5f87542..2dbceb1 100644
--- a/datafusion/src/optimizer/simplify_expressions.rs
+++ b/datafusion/src/optimizer/simplify_expressions.rs
@@ -24,8 +24,8 @@ use arrow::record_batch::RecordBatch;
 use crate::error::DataFusionError;
 use crate::execution::context::ExecutionProps;
 use crate::logical_plan::{
-    lit, DFSchema, DFSchemaRef, Expr, ExprRewriter, LogicalPlan, RewriteRecursion,
-    SimplifyInfo,
+    lit, DFSchema, DFSchemaRef, Expr, ExprRewritable, ExprRewriter, ExprSimplifiable,
+    LogicalPlan, RewriteRecursion, SimplifyInfo,
 };
 use crate::optimizer::optimizer::OptimizerRule;
 use crate::optimizer::utils;
diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs
index f7ab836..41d1e4b 100644
--- a/datafusion/src/optimizer/utils.rs
+++ b/datafusion/src/optimizer/utils.rs
@@ -22,9 +22,11 @@ use crate::execution::context::ExecutionProps;
 use crate::logical_plan::plan::{
     Aggregate, Analyze, Extension, Filter, Join, Projection, Sort, Window,
 };
+
 use crate::logical_plan::{
-    build_join_schema, Column, CreateMemoryTable, DFSchemaRef, Expr, Limit, LogicalPlan,
-    LogicalPlanBuilder, Operator, Partitioning, Recursion, Repartition, Union, Values,
+    build_join_schema, Column, CreateMemoryTable, DFSchemaRef, Expr, ExprVisitable,
+    Limit, LogicalPlan, LogicalPlanBuilder, Operator, Partitioning, Recursion,
+    Repartition, Union, Values,
 };
 use crate::prelude::lit;
 use crate::scalar::ScalarValue;
diff --git a/datafusion/src/sql/utils.rs b/datafusion/src/sql/utils.rs
index d0cef0f..cbe40d6 100644
--- a/datafusion/src/sql/utils.rs
+++ b/datafusion/src/sql/utils.rs
@@ -20,6 +20,7 @@
 use arrow::datatypes::DataType;
 use sqlparser::ast::Ident;
 
+use crate::logical_plan::ExprVisitable;
 use crate::logical_plan::{Expr, LogicalPlan};
 use crate::scalar::{ScalarValue, MAX_PRECISION_FOR_DECIMAL128};
 use crate::{