You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/06/05 12:02:42 UTC

[arrow-datafusion] branch master updated: MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 930653415 MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686)
930653415 is described below

commit 930653415c6303b34f856826ba73717b25b9574c
Author: Andy Grove <ag...@apache.org>
AuthorDate: Sun Jun 5 06:02:37 2022 -0600

    MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686)
---
 .github/workflows/dev_pr/labeler.yml               |  2 +-
 datafusion/core/src/lib.rs                         |  5 +-
 datafusion/core/src/logical_plan/mod.rs            |  3 +-
 datafusion/core/src/optimizer/mod.rs               | 28 -------
 datafusion/optimizer/Cargo.toml                    |  1 +
 .../src/expr_simplifier.rs}                        | 14 ++--
 datafusion/optimizer/src/lib.rs                    |  2 +
 .../src}/simplify_expressions.rs                   | 85 +++++++++++++---------
 8 files changed, 66 insertions(+), 74 deletions(-)

diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 3d010355b..90a4619d4 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -38,7 +38,7 @@ physical-expr:
   - datafusion/physical-expr/**/*
 
 optimizer:
-  - datafusion/core/src/optimizer/**/*
+  - datafusion/optimizer/**/*
 
 core:
   - datafusion/core/**/*
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 4b4722285..df863ebd1 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -213,7 +213,6 @@ pub mod datasource;
 pub mod error;
 pub mod execution;
 pub mod logical_plan;
-pub mod optimizer;
 pub mod physical_optimizer;
 pub mod physical_plan;
 pub mod prelude;
@@ -230,10 +229,10 @@ pub use parquet;
 pub use datafusion_common as common;
 pub use datafusion_data_access;
 pub use datafusion_expr as logical_expr;
+pub use datafusion_optimizer as optimizer;
 pub use datafusion_physical_expr as physical_expr;
-pub use datafusion_sql as sql;
-
 pub use datafusion_row as row;
+pub use datafusion_sql as sql;
 
 #[cfg(feature = "jit")]
 pub use datafusion_jit as jit;
diff --git a/datafusion/core/src/logical_plan/mod.rs b/datafusion/core/src/logical_plan/mod.rs
index 0a9731441..eb6e788d5 100644
--- a/datafusion/core/src/logical_plan/mod.rs
+++ b/datafusion/core/src/logical_plan/mod.rs
@@ -22,7 +22,6 @@
 //! physical query plans and executed.
 
 mod expr;
-mod expr_simplier;
 pub mod plan;
 mod registry;
 pub mod window_frames;
@@ -36,6 +35,7 @@ pub use datafusion_expr::{
     },
     ExprSchemable, Operator,
 };
+pub use datafusion_optimizer::expr_simplifier::{ExprSimplifiable, SimplifyInfo};
 pub use expr::{
     abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
     avg, bit_length, btrim, call_fn, case, ceil, character_length, chr, coalesce, col,
@@ -54,7 +54,6 @@ pub use expr_rewriter::{
     rewrite_sort_cols_by_aggs, unnormalize_col, unnormalize_cols, ExprRewritable,
     ExprRewriter, RewriteRecursion,
 };
-pub use expr_simplier::{ExprSimplifiable, SimplifyInfo};
 pub use plan::{provider_as_source, source_as_provider};
 pub use plan::{
     CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateMemoryTable,
diff --git a/datafusion/core/src/optimizer/mod.rs b/datafusion/core/src/optimizer/mod.rs
deleted file mode 100644
index cf6412db9..000000000
--- a/datafusion/core/src/optimizer/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains a query optimizer that operates against a logical plan and applies
-//! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion".
-
-#![allow(clippy::module_inception)]
-pub mod simplify_expressions;
-
-pub use datafusion_optimizer::{
-    common_subexpr_eliminate, eliminate_filter, eliminate_limit, filter_push_down,
-    limit_push_down, optimizer, projection_push_down, single_distinct_to_groupby,
-    subquery_filter_to_join, utils,
-};
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index 4d024f4c5..1c9d4cc24 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -42,5 +42,6 @@ async-trait = "0.1.41"
 chrono = { version = "0.4", default-features = false }
 datafusion-common = { path = "../common", version = "8.0.0" }
 datafusion-expr = { path = "../expr", version = "8.0.0" }
+datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
 hashbrown = { version = "0.12", features = ["raw"] }
 log = "^0.4"
diff --git a/datafusion/core/src/logical_plan/expr_simplier.rs b/datafusion/optimizer/src/expr_simplifier.rs
similarity index 88%
rename from datafusion/core/src/logical_plan/expr_simplier.rs
rename to datafusion/optimizer/src/expr_simplifier.rs
index f5dc727f2..d71ecdaa2 100644
--- a/datafusion/core/src/logical_plan/expr_simplier.rs
+++ b/datafusion/optimizer/src/expr_simplifier.rs
@@ -17,11 +17,10 @@
 
 //! Expression simplifier
 
-use super::Expr;
-use super::ExprRewritable;
-use crate::execution::context::ExecutionProps;
-use crate::optimizer::simplify_expressions::{ConstEvaluator, Simplifier};
+use crate::simplify_expressions::{ConstEvaluator, Simplifier};
 use datafusion_common::Result;
+use datafusion_expr::{expr_rewriter::ExprRewritable, Expr};
+use datafusion_physical_expr::execution_props::ExecutionProps;
 
 #[allow(rustdoc::private_intra_doc_links)]
 /// The information necessary to apply algebraic simplification to an
@@ -54,9 +53,10 @@ impl ExprSimplifiable for Expr {
     /// `b > 2`
     ///
     /// ```
-    /// use datafusion::logical_plan::*;
-    /// use datafusion::error::Result;
-    /// use datafusion::execution::context::ExecutionProps;
+    /// use datafusion_expr::{col, lit, Expr};
+    /// use datafusion_common::Result;
+    /// use datafusion_physical_expr::execution_props::ExecutionProps;
+    /// use datafusion_optimizer::expr_simplifier::{SimplifyInfo, ExprSimplifiable};
     ///
     /// /// Simple implementation that provides `Simplifier` the information it needs
     /// #[derive(Default)]
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 9a5130050..cde355073 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -18,10 +18,12 @@
 pub mod common_subexpr_eliminate;
 pub mod eliminate_filter;
 pub mod eliminate_limit;
+pub mod expr_simplifier;
 pub mod filter_push_down;
 pub mod limit_push_down;
 pub mod optimizer;
 pub mod projection_push_down;
+pub mod simplify_expressions;
 pub mod single_distinct_to_groupby;
 pub mod subquery_filter_to_join;
 pub mod utils;
diff --git a/datafusion/core/src/optimizer/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs
similarity index 96%
rename from datafusion/core/src/optimizer/simplify_expressions.rs
rename to datafusion/optimizer/src/simplify_expressions.rs
index 162da4894..9be9bfde8 100644
--- a/datafusion/core/src/optimizer/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -17,9 +17,8 @@
 
 //! Simplify expressions optimizer rule
 
-use crate::execution::context::ExecutionProps;
-use crate::logical_plan::{ExprSimplifiable, SimplifyInfo};
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::expr_simplifier::ExprSimplifiable;
+use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
 use arrow::array::new_null_array;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
@@ -30,9 +29,9 @@ use datafusion_expr::{
     lit,
     logical_plan::LogicalPlan,
     utils::from_plan,
-    Expr, ExprSchemable, Operator, Volatility,
+    ColumnarValue, Expr, ExprSchemable, Operator, Volatility,
 };
-use datafusion_physical_expr::create_physical_expr;
+use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
 
 /// Provides simplification information based on schema and properties
 pub(crate) struct SimplifyContext<'a, 'b> {
@@ -95,7 +94,7 @@ impl<'a, 'b> SimplifyInfo for SimplifyContext<'a, 'b> {
 /// `Filter: b > 2`
 ///
 #[derive(Default)]
-pub(crate) struct SimplifyExpressions {}
+pub struct SimplifyExpressions {}
 
 /// returns true if `needle` is found in a chain of search_op
 /// expressions. Such as: (A AND B) AND C
@@ -265,13 +264,13 @@ impl SimplifyExpressions {
 /// --> `a`, which is handled by [`Simplifier`]
 ///
 /// ```
-/// # use datafusion::prelude::*;
-/// # use datafusion::logical_plan::ExprRewritable;
-/// # use datafusion::optimizer::simplify_expressions::ConstEvaluator;
-/// # use datafusion::execution::context::ExecutionProps;
+/// # use datafusion_expr::{col, lit};
+/// # use datafusion_optimizer::simplify_expressions::ConstEvaluator;
+/// # use datafusion_physical_expr::execution_props::ExecutionProps;
+/// # use datafusion_expr::expr_rewriter::ExprRewritable;
 ///
-/// let optimizer_config = ExecutionProps::new();
-/// let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
+/// let execution_props = ExecutionProps::new();
+/// let mut const_evaluator = ConstEvaluator::new(&execution_props);
 ///
 /// // (1 + 2) + a
 /// let expr = (lit(1) + lit(2)) + col("a");
@@ -295,7 +294,7 @@ pub struct ConstEvaluator<'a> {
     /// descendants) so this Expr can be evaluated
     can_evaluate: Vec<bool>,
 
-    optimizer_config: &'a ExecutionProps,
+    execution_props: &'a ExecutionProps,
     input_schema: DFSchema,
     input_batch: RecordBatch,
 }
@@ -341,8 +340,8 @@ impl<'a> ExprRewriter for ConstEvaluator<'a> {
 impl<'a> ConstEvaluator<'a> {
     /// Create a new `ConstantEvaluator`. Session constants (such as
     /// the time for `now()` are taken from the passed
-    /// `optimizer_config`.
-    pub fn new(optimizer_config: &'a ExecutionProps) -> Self {
+    /// `execution_props`.
+    pub fn new(execution_props: &'a ExecutionProps) -> Self {
         let input_schema = DFSchema::empty();
 
         // The dummy column name is unused and doesn't matter as only
@@ -357,7 +356,7 @@ impl<'a> ConstEvaluator<'a> {
 
         Self {
             can_evaluate: vec![],
-            optimizer_config,
+            execution_props,
             input_schema,
             input_batch,
         }
@@ -423,11 +422,11 @@ impl<'a> ConstEvaluator<'a> {
             &expr,
             &self.input_schema,
             &self.input_batch.schema(),
-            self.optimizer_config,
+            self.execution_props,
         )?;
         let col_val = phys_expr.evaluate(&self.input_batch)?;
         match col_val {
-            crate::physical_plan::ColumnarValue::Array(a) => {
+            ColumnarValue::Array(a) => {
                 if a.len() != 1 {
                     Err(DataFusionError::Execution(format!(
                         "Could not evaluate the expressison, found a result of length {}",
@@ -437,7 +436,7 @@ impl<'a> ConstEvaluator<'a> {
                     Ok(ScalarValue::try_from_array(&a, 0)?)
                 }
             }
-            crate::physical_plan::ColumnarValue::Scalar(s) => Ok(s),
+            ColumnarValue::Scalar(s) => Ok(s),
         }
     }
 }
@@ -745,22 +744,42 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> {
     }
 }
 
+/// A macro to assert that one string is contained within another with
+/// a nice error message if they are not.
+///
+/// Usage: `assert_contains!(actual, expected)`
+///
+/// Is a macro so test error
+/// messages are on the same line as the failure;
+///
+/// Both arguments must be convertable into Strings (Into<String>)
+#[macro_export]
+macro_rules! assert_contains {
+    ($ACTUAL: expr, $EXPECTED: expr) => {
+        let actual_value: String = $ACTUAL.into();
+        let expected_value: String = $EXPECTED.into();
+        assert!(
+            actual_value.contains(&expected_value),
+            "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
+            expected_value,
+            actual_value
+        );
+    };
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::assert_contains;
-    use crate::logical_plan::{call_fn, create_udf};
-    use crate::physical_plan::functions::make_scalar_function;
-    use crate::physical_plan::udf::ScalarUDF;
-    use crate::test_util::scan_empty;
     use arrow::array::{ArrayRef, Int32Array};
     use chrono::{DateTime, TimeZone, Utc};
     use datafusion_common::DFField;
+    use datafusion_expr::logical_plan::table_scan;
     use datafusion_expr::{
-        and, binary_expr, col, lit, lit_timestamp_nano,
+        and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano,
         logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr,
-        ExprSchemable,
+        ExprSchemable, ScalarUDF,
     };
+    use datafusion_physical_expr::functions::make_scalar_function;
     use std::collections::HashMap;
     use std::sync::Arc;
 
@@ -1192,12 +1211,12 @@ mod tests {
         expected_expr: Expr,
         date_time: &DateTime<Utc>,
     ) {
-        let optimizer_config = ExecutionProps {
+        let execution_props = ExecutionProps {
             query_execution_start_time: *date_time,
             var_providers: None,
         };
 
-        let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
+        let mut const_evaluator = ConstEvaluator::new(&execution_props);
         let evaluated_expr = input_expr
             .clone()
             .rewrite(&mut const_evaluator)
@@ -1220,8 +1239,8 @@ mod tests {
 
     fn simplify(expr: Expr) -> Expr {
         let schema = expr_test_schema();
-        let optimizer_config = ExecutionProps::new();
-        let info = SimplifyContext::new(vec![&schema], &optimizer_config);
+        let execution_props = ExecutionProps::new();
+        let info = SimplifyContext::new(vec![&schema], &execution_props);
         expr.simplify(&info).unwrap()
     }
 
@@ -1522,7 +1541,7 @@ mod tests {
             Field::new("c", DataType::Boolean, false),
             Field::new("d", DataType::UInt32, false),
         ]);
-        scan_empty(Some("test"), &schema, None)
+        table_scan(Some("test"), &schema, None)
             .expect("creating scan")
             .build()
             .expect("building plan")
@@ -1710,8 +1729,8 @@ mod tests {
             .aggregate(
                 vec![col("a"), col("c")],
                 vec![
-                    crate::logical_plan::max(col("b").eq(lit(true))),
-                    crate::logical_plan::min(col("b")),
+                    datafusion_expr::max(col("b").eq(lit(true))),
+                    datafusion_expr::min(col("b")),
                 ],
             )
             .unwrap()