You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/06/05 12:02:42 UTC
[arrow-datafusion] branch master updated: MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 930653415 MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686)
930653415 is described below
commit 930653415c6303b34f856826ba73717b25b9574c
Author: Andy Grove <ag...@apache.org>
AuthorDate: Sun Jun 5 06:02:37 2022 -0600
MINOR: Move `simplify_expression` rule to `datafusion-optimizer` crate (#2686)
---
.github/workflows/dev_pr/labeler.yml | 2 +-
datafusion/core/src/lib.rs | 5 +-
datafusion/core/src/logical_plan/mod.rs | 3 +-
datafusion/core/src/optimizer/mod.rs | 28 -------
datafusion/optimizer/Cargo.toml | 1 +
.../src/expr_simplifier.rs} | 14 ++--
datafusion/optimizer/src/lib.rs | 2 +
.../src}/simplify_expressions.rs | 85 +++++++++++++---------
8 files changed, 66 insertions(+), 74 deletions(-)
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 3d010355b..90a4619d4 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -38,7 +38,7 @@ physical-expr:
- datafusion/physical-expr/**/*
optimizer:
- - datafusion/core/src/optimizer/**/*
+ - datafusion/optimizer/**/*
core:
- datafusion/core/**/*
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 4b4722285..df863ebd1 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -213,7 +213,6 @@ pub mod datasource;
pub mod error;
pub mod execution;
pub mod logical_plan;
-pub mod optimizer;
pub mod physical_optimizer;
pub mod physical_plan;
pub mod prelude;
@@ -230,10 +229,10 @@ pub use parquet;
pub use datafusion_common as common;
pub use datafusion_data_access;
pub use datafusion_expr as logical_expr;
+pub use datafusion_optimizer as optimizer;
pub use datafusion_physical_expr as physical_expr;
-pub use datafusion_sql as sql;
-
pub use datafusion_row as row;
+pub use datafusion_sql as sql;
#[cfg(feature = "jit")]
pub use datafusion_jit as jit;
diff --git a/datafusion/core/src/logical_plan/mod.rs b/datafusion/core/src/logical_plan/mod.rs
index 0a9731441..eb6e788d5 100644
--- a/datafusion/core/src/logical_plan/mod.rs
+++ b/datafusion/core/src/logical_plan/mod.rs
@@ -22,7 +22,6 @@
//! physical query plans and executed.
mod expr;
-mod expr_simplier;
pub mod plan;
mod registry;
pub mod window_frames;
@@ -36,6 +35,7 @@ pub use datafusion_expr::{
},
ExprSchemable, Operator,
};
+pub use datafusion_optimizer::expr_simplifier::{ExprSimplifiable, SimplifyInfo};
pub use expr::{
abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
avg, bit_length, btrim, call_fn, case, ceil, character_length, chr, coalesce, col,
@@ -54,7 +54,6 @@ pub use expr_rewriter::{
rewrite_sort_cols_by_aggs, unnormalize_col, unnormalize_cols, ExprRewritable,
ExprRewriter, RewriteRecursion,
};
-pub use expr_simplier::{ExprSimplifiable, SimplifyInfo};
pub use plan::{provider_as_source, source_as_provider};
pub use plan::{
CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateMemoryTable,
diff --git a/datafusion/core/src/optimizer/mod.rs b/datafusion/core/src/optimizer/mod.rs
deleted file mode 100644
index cf6412db9..000000000
--- a/datafusion/core/src/optimizer/mod.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module contains a query optimizer that operates against a logical plan and applies
-//! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion".
-
-#![allow(clippy::module_inception)]
-pub mod simplify_expressions;
-
-pub use datafusion_optimizer::{
- common_subexpr_eliminate, eliminate_filter, eliminate_limit, filter_push_down,
- limit_push_down, optimizer, projection_push_down, single_distinct_to_groupby,
- subquery_filter_to_join, utils,
-};
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index 4d024f4c5..1c9d4cc24 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -42,5 +42,6 @@ async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "8.0.0" }
datafusion-expr = { path = "../expr", version = "8.0.0" }
+datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
hashbrown = { version = "0.12", features = ["raw"] }
log = "^0.4"
diff --git a/datafusion/core/src/logical_plan/expr_simplier.rs b/datafusion/optimizer/src/expr_simplifier.rs
similarity index 88%
rename from datafusion/core/src/logical_plan/expr_simplier.rs
rename to datafusion/optimizer/src/expr_simplifier.rs
index f5dc727f2..d71ecdaa2 100644
--- a/datafusion/core/src/logical_plan/expr_simplier.rs
+++ b/datafusion/optimizer/src/expr_simplifier.rs
@@ -17,11 +17,10 @@
//! Expression simplifier
-use super::Expr;
-use super::ExprRewritable;
-use crate::execution::context::ExecutionProps;
-use crate::optimizer::simplify_expressions::{ConstEvaluator, Simplifier};
+use crate::simplify_expressions::{ConstEvaluator, Simplifier};
use datafusion_common::Result;
+use datafusion_expr::{expr_rewriter::ExprRewritable, Expr};
+use datafusion_physical_expr::execution_props::ExecutionProps;
#[allow(rustdoc::private_intra_doc_links)]
/// The information necessary to apply algebraic simplification to an
@@ -54,9 +53,10 @@ impl ExprSimplifiable for Expr {
/// `b > 2`
///
/// ```
- /// use datafusion::logical_plan::*;
- /// use datafusion::error::Result;
- /// use datafusion::execution::context::ExecutionProps;
+ /// use datafusion_expr::{col, lit, Expr};
+ /// use datafusion_common::Result;
+ /// use datafusion_physical_expr::execution_props::ExecutionProps;
+ /// use datafusion_optimizer::expr_simplifier::{SimplifyInfo, ExprSimplifiable};
///
/// /// Simple implementation that provides `Simplifier` the information it needs
/// #[derive(Default)]
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 9a5130050..cde355073 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -18,10 +18,12 @@
pub mod common_subexpr_eliminate;
pub mod eliminate_filter;
pub mod eliminate_limit;
+pub mod expr_simplifier;
pub mod filter_push_down;
pub mod limit_push_down;
pub mod optimizer;
pub mod projection_push_down;
+pub mod simplify_expressions;
pub mod single_distinct_to_groupby;
pub mod subquery_filter_to_join;
pub mod utils;
diff --git a/datafusion/core/src/optimizer/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs
similarity index 96%
rename from datafusion/core/src/optimizer/simplify_expressions.rs
rename to datafusion/optimizer/src/simplify_expressions.rs
index 162da4894..9be9bfde8 100644
--- a/datafusion/core/src/optimizer/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -17,9 +17,8 @@
//! Simplify expressions optimizer rule
-use crate::execution::context::ExecutionProps;
-use crate::logical_plan::{ExprSimplifiable, SimplifyInfo};
-use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use crate::expr_simplifier::ExprSimplifiable;
+use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
@@ -30,9 +29,9 @@ use datafusion_expr::{
lit,
logical_plan::LogicalPlan,
utils::from_plan,
- Expr, ExprSchemable, Operator, Volatility,
+ ColumnarValue, Expr, ExprSchemable, Operator, Volatility,
};
-use datafusion_physical_expr::create_physical_expr;
+use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};
/// Provides simplification information based on schema and properties
pub(crate) struct SimplifyContext<'a, 'b> {
@@ -95,7 +94,7 @@ impl<'a, 'b> SimplifyInfo for SimplifyContext<'a, 'b> {
/// `Filter: b > 2`
///
#[derive(Default)]
-pub(crate) struct SimplifyExpressions {}
+pub struct SimplifyExpressions {}
/// returns true if `needle` is found in a chain of search_op
/// expressions. Such as: (A AND B) AND C
@@ -265,13 +264,13 @@ impl SimplifyExpressions {
/// --> `a`, which is handled by [`Simplifier`]
///
/// ```
-/// # use datafusion::prelude::*;
-/// # use datafusion::logical_plan::ExprRewritable;
-/// # use datafusion::optimizer::simplify_expressions::ConstEvaluator;
-/// # use datafusion::execution::context::ExecutionProps;
+/// # use datafusion_expr::{col, lit};
+/// # use datafusion_optimizer::simplify_expressions::ConstEvaluator;
+/// # use datafusion_physical_expr::execution_props::ExecutionProps;
+/// # use datafusion_expr::expr_rewriter::ExprRewritable;
///
-/// let optimizer_config = ExecutionProps::new();
-/// let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
+/// let execution_props = ExecutionProps::new();
+/// let mut const_evaluator = ConstEvaluator::new(&execution_props);
///
/// // (1 + 2) + a
/// let expr = (lit(1) + lit(2)) + col("a");
@@ -295,7 +294,7 @@ pub struct ConstEvaluator<'a> {
/// descendants) so this Expr can be evaluated
can_evaluate: Vec<bool>,
- optimizer_config: &'a ExecutionProps,
+ execution_props: &'a ExecutionProps,
input_schema: DFSchema,
input_batch: RecordBatch,
}
@@ -341,8 +340,8 @@ impl<'a> ExprRewriter for ConstEvaluator<'a> {
impl<'a> ConstEvaluator<'a> {
/// Create a new `ConstantEvaluator`. Session constants (such as
/// the time for `now()` are taken from the passed
- /// `optimizer_config`.
- pub fn new(optimizer_config: &'a ExecutionProps) -> Self {
+ /// `execution_props`.
+ pub fn new(execution_props: &'a ExecutionProps) -> Self {
let input_schema = DFSchema::empty();
// The dummy column name is unused and doesn't matter as only
@@ -357,7 +356,7 @@ impl<'a> ConstEvaluator<'a> {
Self {
can_evaluate: vec![],
- optimizer_config,
+ execution_props,
input_schema,
input_batch,
}
@@ -423,11 +422,11 @@ impl<'a> ConstEvaluator<'a> {
&expr,
&self.input_schema,
&self.input_batch.schema(),
- self.optimizer_config,
+ self.execution_props,
)?;
let col_val = phys_expr.evaluate(&self.input_batch)?;
match col_val {
- crate::physical_plan::ColumnarValue::Array(a) => {
+ ColumnarValue::Array(a) => {
if a.len() != 1 {
Err(DataFusionError::Execution(format!(
"Could not evaluate the expressison, found a result of length {}",
@@ -437,7 +436,7 @@ impl<'a> ConstEvaluator<'a> {
Ok(ScalarValue::try_from_array(&a, 0)?)
}
}
- crate::physical_plan::ColumnarValue::Scalar(s) => Ok(s),
+ ColumnarValue::Scalar(s) => Ok(s),
}
}
}
@@ -745,22 +744,42 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> {
}
}
+/// A macro to assert that one string is contained within another with
+/// a nice error message if they are not.
+///
+/// Usage: `assert_contains!(actual, expected)`
+///
+/// Is a macro so test error
+/// messages are on the same line as the failure;
+///
+/// Both arguments must be convertable into Strings (Into<String>)
+#[macro_export]
+macro_rules! assert_contains {
+ ($ACTUAL: expr, $EXPECTED: expr) => {
+ let actual_value: String = $ACTUAL.into();
+ let expected_value: String = $EXPECTED.into();
+ assert!(
+ actual_value.contains(&expected_value),
+ "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
+ expected_value,
+ actual_value
+ );
+ };
+}
+
#[cfg(test)]
mod tests {
use super::*;
- use crate::assert_contains;
- use crate::logical_plan::{call_fn, create_udf};
- use crate::physical_plan::functions::make_scalar_function;
- use crate::physical_plan::udf::ScalarUDF;
- use crate::test_util::scan_empty;
use arrow::array::{ArrayRef, Int32Array};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::DFField;
+ use datafusion_expr::logical_plan::table_scan;
use datafusion_expr::{
- and, binary_expr, col, lit, lit_timestamp_nano,
+ and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano,
logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr,
- ExprSchemable,
+ ExprSchemable, ScalarUDF,
};
+ use datafusion_physical_expr::functions::make_scalar_function;
use std::collections::HashMap;
use std::sync::Arc;
@@ -1192,12 +1211,12 @@ mod tests {
expected_expr: Expr,
date_time: &DateTime<Utc>,
) {
- let optimizer_config = ExecutionProps {
+ let execution_props = ExecutionProps {
query_execution_start_time: *date_time,
var_providers: None,
};
- let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
+ let mut const_evaluator = ConstEvaluator::new(&execution_props);
let evaluated_expr = input_expr
.clone()
.rewrite(&mut const_evaluator)
@@ -1220,8 +1239,8 @@ mod tests {
fn simplify(expr: Expr) -> Expr {
let schema = expr_test_schema();
- let optimizer_config = ExecutionProps::new();
- let info = SimplifyContext::new(vec![&schema], &optimizer_config);
+ let execution_props = ExecutionProps::new();
+ let info = SimplifyContext::new(vec![&schema], &execution_props);
expr.simplify(&info).unwrap()
}
@@ -1522,7 +1541,7 @@ mod tests {
Field::new("c", DataType::Boolean, false),
Field::new("d", DataType::UInt32, false),
]);
- scan_empty(Some("test"), &schema, None)
+ table_scan(Some("test"), &schema, None)
.expect("creating scan")
.build()
.expect("building plan")
@@ -1710,8 +1729,8 @@ mod tests {
.aggregate(
vec![col("a"), col("c")],
vec![
- crate::logical_plan::max(col("b").eq(lit(true))),
- crate::logical_plan::min(col("b")),
+ datafusion_expr::max(col("b").eq(lit(true))),
+ datafusion_expr::min(col("b")),
],
)
.unwrap()