You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/12 20:10:48 UTC
[arrow-datafusion] branch master updated: MINOR: Optimizer example and docs, deprecate `Expr::name` (#3788)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 4cb8ac094 MINOR: Optimizer example and docs, deprecate `Expr::name` (#3788)
4cb8ac094 is described below
commit 4cb8ac094ee88dc0023b4cde1b39840a0a362ee0
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Oct 12 14:10:41 2022 -0600
MINOR: Optimizer example and docs, deprecate `Expr::name` (#3788)
* Optimizer example and docs
* more docs
* clippy
* Add notes on expression naming
* debugging
* add display_name and deprecate name
* clippy
* Update datafusion/optimizer/README.md
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* Update datafusion/optimizer/README.md
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* Update datafusion/optimizer/README.md
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* Update datafusion/optimizer/README.md
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* move example
* move example
* toml fmt
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
datafusion-examples/Cargo.toml | 5 +
datafusion-examples/examples/rewrite_expr.rs | 163 +++++++++++
datafusion/expr/src/expr.rs | 13 +-
datafusion/expr/src/expr_schema.rs | 2 +-
datafusion/expr/src/logical_plan/builder.rs | 2 +-
datafusion/expr/src/utils.rs | 4 +-
datafusion/optimizer/README.md | 314 ++++++++++++++++++++-
.../optimizer/src/common_subexpr_eliminate.rs | 2 +-
datafusion/optimizer/src/filter_push_down.rs | 2 +-
datafusion/optimizer/src/optimizer.rs | 3 +-
datafusion/optimizer/src/projection_push_down.rs | 4 +-
datafusion/optimizer/src/simplify_expressions.rs | 4 +-
.../optimizer/src/single_distinct_to_groupby.rs | 4 +-
datafusion/optimizer/src/utils.rs | 10 +-
14 files changed, 508 insertions(+), 24 deletions(-)
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 844bdba7a..2530f890f 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -34,9 +34,14 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]
[dev-dependencies]
+arrow = "24.0.0"
arrow-flight = "24.0.0"
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
+datafusion-common = { path = "../datafusion/common" }
+datafusion-expr = { path = "../datafusion/expr" }
+datafusion-optimizer = { path = "../datafusion/optimizer" }
+datafusion-sql = { path = "../datafusion/sql" }
futures = "0.3"
num_cpus = "1.13.0"
object_store = { version = "0.5.0", features = ["aws"] }
diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs
new file mode 100644
index 000000000..5e93bfd73
--- /dev/null
+++ b/datafusion-examples/examples/rewrite_expr.rs
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter};
+use datafusion_expr::{AggregateUDF, Expr, Filter, LogicalPlan, ScalarUDF, TableSource};
+use datafusion_optimizer::optimizer::Optimizer;
+use datafusion_optimizer::{utils, OptimizerConfig, OptimizerRule};
+use datafusion_sql::planner::{ContextProvider, SqlToRel};
+use datafusion_sql::sqlparser::dialect::PostgreSqlDialect;
+use datafusion_sql::sqlparser::parser::Parser;
+use datafusion_sql::TableReference;
+use std::any::Any;
+use std::sync::Arc;
+
+pub fn main() -> Result<()> {
+ // produce a logical plan using the datafusion-sql crate
+ let dialect = PostgreSqlDialect {};
+ let sql = "SELECT * FROM person WHERE age BETWEEN 21 AND 32";
+ let statements = Parser::parse_sql(&dialect, sql)?;
+
+ // produce a logical plan using the datafusion-sql crate
+ let context_provider = MyContextProvider {};
+ let sql_to_rel = SqlToRel::new(&context_provider);
+ let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?;
+ println!(
+ "Unoptimized Logical Plan:\n\n{}\n",
+ logical_plan.display_indent()
+ );
+
+ // now run the optimizer with our custom rule
+ let optimizer = Optimizer::with_rules(vec![Arc::new(MyRule {})]);
+ let mut optimizer_config = OptimizerConfig::default().with_skip_failing_rules(false);
+ let optimized_plan =
+ optimizer.optimize(&logical_plan, &mut optimizer_config, observe)?;
+ println!(
+ "Optimized Logical Plan:\n\n{}\n",
+ optimized_plan.display_indent()
+ );
+
+ Ok(())
+}
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+ println!(
+ "After applying rule '{}':\n{}\n",
+ rule.name(),
+ plan.display_indent()
+ )
+}
+
+struct MyRule {}
+
+impl OptimizerRule for MyRule {
+ fn name(&self) -> &str {
+ "my_rule"
+ }
+
+ fn optimize(
+ &self,
+ plan: &LogicalPlan,
+ _config: &mut OptimizerConfig,
+ ) -> Result<LogicalPlan> {
+ // recurse down and optimize children first
+ let plan = utils::optimize_children(self, plan, _config)?;
+
+ match plan {
+ LogicalPlan::Filter(filter) => {
+ let mut expr_rewriter = MyExprRewriter {};
+ let predicate = filter.predicate().clone();
+ let predicate = predicate.rewrite(&mut expr_rewriter)?;
+ Ok(LogicalPlan::Filter(Filter::try_new(
+ predicate,
+ filter.input().clone(),
+ )?))
+ }
+ _ => Ok(plan.clone()),
+ }
+ }
+}
+
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+ fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+ match expr {
+ Expr::Between {
+ negated,
+ expr,
+ low,
+ high,
+ } => {
+ let expr: Expr = expr.as_ref().clone();
+ let low: Expr = low.as_ref().clone();
+ let high: Expr = high.as_ref().clone();
+ if negated {
+ Ok(expr.clone().lt(low).or(expr.gt(high)))
+ } else {
+ Ok(expr.clone().gt_eq(low).and(expr.lt_eq(high)))
+ }
+ }
+ _ => Ok(expr.clone()),
+ }
+ }
+}
+
+struct MyContextProvider {}
+
+impl ContextProvider for MyContextProvider {
+ fn get_table_provider(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
+ if name.table() == "person" {
+ Ok(Arc::new(MyTableSource {
+ schema: Arc::new(Schema::new(vec![
+ Field::new("name", DataType::Utf8, false),
+ Field::new("age", DataType::UInt8, false),
+ ])),
+ }))
+ } else {
+ Err(DataFusionError::Plan("table not found".to_string()))
+ }
+ }
+
+ fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
+ None
+ }
+
+ fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
+ None
+ }
+
+ fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
+ None
+ }
+}
+
+struct MyTableSource {
+ schema: SchemaRef,
+}
+
+impl TableSource for MyTableSource {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn schema(&self) -> SchemaRef {
+ self.schema.clone()
+ }
+}
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c21dd5c88..6f683dc7f 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -367,10 +367,17 @@ impl PartialOrd for Expr {
impl Expr {
/// Returns the name of this expression as it should appear in a schema. This name
/// will not include any CAST expressions.
- pub fn name(&self) -> Result<String> {
+ pub fn display_name(&self) -> Result<String> {
create_name(self)
}
+ /// Returns the name of this expression as it should appear in a schema. This name
+ /// will not include any CAST expressions.
+ #[deprecated(since = "14.0.0", note = "please use `display_name` instead")]
+ pub fn name(&self) -> Result<String> {
+ self.display_name()
+ }
+
/// Returns a full and complete string representation of this expression.
pub fn canonical_name(&self) -> String {
format!("{}", self)
@@ -1186,7 +1193,7 @@ mod test {
assert_eq!(expected, expr.canonical_name());
assert_eq!(expected, format!("{}", expr));
assert_eq!(expected, format!("{:?}", expr));
- assert_eq!(expected, expr.name()?);
+ assert_eq!(expected, expr.display_name()?);
Ok(())
}
@@ -1202,7 +1209,7 @@ mod test {
assert_eq!(expected_canonical, format!("{:?}", expr));
// note that CAST intentionally has a name that is different from its `Display`
// representation. CAST does not change the name of expressions.
- assert_eq!("Float32(1.23)", expr.name()?);
+ assert_eq!("Float32(1.23)", expr.display_name()?);
Ok(())
}
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 5442a2421..cfcbcc940 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -240,7 +240,7 @@ impl ExprSchemable for Expr {
)),
_ => Ok(DFField::new(
None,
- &self.name()?,
+ &self.display_name()?,
self.get_type(input_schema)?,
self.nullable(input_schema)?,
)),
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 300c3b8cb..631a64da6 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -841,7 +841,7 @@ pub(crate) fn validate_unique_names<'a>(
) -> Result<()> {
let mut unique_names = HashMap::new();
expressions.into_iter().enumerate().try_for_each(|(position, expr)| {
- let name = expr.name()?;
+ let name = expr.display_name()?;
match unique_names.get(&name) {
None => {
unique_names.insert(name, (position, expr));
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 8e2544793..be4c45f8c 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -676,7 +676,7 @@ pub fn columnize_expr(e: Expr, input_schema: &DFSchema) -> Expr {
Expr::Alias(Box::new(columnize_expr(*inner_expr, input_schema)), name)
}
Expr::ScalarSubquery(_) => e.clone(),
- _ => match e.name() {
+ _ => match e.display_name() {
Ok(name) => match input_schema.field_with_unqualified_name(&name) {
Ok(field) => Expr::Column(field.qualified_column()),
// expression not provided as input, do not convert to a column reference
@@ -728,7 +728,7 @@ pub fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
let field = plan.schema().field_from_column(col)?;
Ok(Expr::Column(field.qualified_column()))
}
- _ => Ok(Expr::Column(Column::from_name(expr.name()?))),
+ _ => Ok(Expr::Column(Column::from_name(expr.display_name()?))),
}
}
diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
index 39d28a8fa..b8edeea87 100644
--- a/datafusion/optimizer/README.md
+++ b/datafusion/optimizer/README.md
@@ -17,10 +17,318 @@
under the License.
-->
-# DataFusion Query Optimizer Rules
+# DataFusion Query Optimizer
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
+format.
-This crate is a submodule of DataFusion that provides query optimizer rules.
+DataFusion has modular design, allowing individual crates to be re-used in other projects.
+
+This crate is a submodule of DataFusion that provides a query optimizer for logical plans, and
+contains an extensive set of OptimizerRules that may rewrite the plan and/or its expressions so
+they execute more quickly while still computing the same result.
+
+## Running the Optimizer
+
+The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
+and applying it to a logical plan to produce an optimized logical plan.
+
+```rust
+
+// We need a logical plan as the starting point. There are many ways to build a logical plan:
+//
+// The `datafusion-expr` crate provides a LogicalPlanBuilder
+// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL
+// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan
+let logical_plan = ...
+
+let mut config = OptimizerConfig::default();
+let optimizer = Optimizer::new(&config);
+let optimized_plan = optimizer.optimize(&logical_plan, &mut config, observe)?;
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+ println!(
+ "After applying rule '{}':\n{}",
+ rule.name(),
+ plan.display_indent()
+ )
+}
+```
+
+## Providing Custom Rules
+
+The optimizer can be created with a custom set of rules.
+
+```rust
+let optimizer = Optimizer::with_rules(vec![
+ Arc::new(MyRule {})
+]);
+```
+
+## Writing Optimization Rules
+
+Please refer to the [rewrite_expr example](../../datafusion-examples/examples/rewrite_expr.rs) to learn more about
+the general approach to writing optimizer rules and then move onto studying the existing rules.
+
+All rules must implement the `OptimizerRule` trait.
+
+```rust
+/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
+/// computes the same results, but in a potentially more efficient
+/// way. If there are no suitable transformations for the input plan,
+/// the optimizer can simply return it as is.
+pub trait OptimizerRule {
+ /// Rewrite `plan` to an optimized form
+ fn optimize(
+ &self,
+ plan: &LogicalPlan,
+ optimizer_config: &mut OptimizerConfig,
+ ) -> Result<LogicalPlan>;
+
+ /// A human readable name for this optimizer rule
+ fn name(&self) -> &str;
+}
+```
+
+### General Guidelines
+
+Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate
+individual operators or expressions.
+
+Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs
+the actual optimization. This approach is used in projection push down and filter push down.
+
+### Expression Naming
+
+Every expression in DataFusion has a name, which is used as the column name. For example, in this example the output
+contains a single column with the name `"COUNT(aggregate_test_100.c9)"`:
+
+```text
+❯ select count(c9) from aggregate_test_100;
++------------------------------+
+| COUNT(aggregate_test_100.c9) |
++------------------------------+
+| 100 |
++------------------------------+
+```
+
+These names are used to refer to the columns in both subqueries as well as internally from one stage of the LogicalPlan
+to another. For example:
+
+```text
+❯ select "COUNT(aggregate_test_100.c9)" + 1 from (select count(c9) from aggregate_test_100) as sq;
++--------------------------------------------+
+| sq.COUNT(aggregate_test_100.c9) + Int64(1) |
++--------------------------------------------+
+| 101 |
++--------------------------------------------+
+```
+
+### Implication
+
+Because DataFusion identifies columns using a string name, it means it is critical that the names of expressions are
+not changed by the optimizer when it rewrites expressions. This is typically accomplished by renaming a rewritten
+expression by adding an alias.
+
+Here is a simple example of such a rewrite. The expression `1 + 2` can be internally simplified to 3 but must still be
+displayed the same as `1 + 2`:
+
+```text
+❯ select 1 + 2;
++---------------------+
+| Int64(1) + Int64(2) |
++---------------------+
+| 3 |
++---------------------+
+```
+
+Looking at the `EXPLAIN` output we can see that the optimizer has effectively rewritten `1 + 2` into effectively
+`3 as "1 + 2"`:
+
+```text
+❯ explain select 1 + 2;
++---------------+-------------------------------------------------+
+| plan_type | plan |
++---------------+-------------------------------------------------+
+| logical_plan | Projection: Int64(3) AS Int64(1) + Int64(2) |
+| | EmptyRelation |
+| physical_plan | ProjectionExec: expr=[3 as Int64(1) + Int64(2)] |
+| | EmptyExec: produce_one_row=true |
+| | |
++---------------+-------------------------------------------------+
+```
+
+If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/arrow-datafusion/issues/3704)
+and [#3555](https://github.com/apache/arrow-datafusion/issues/3555) occur where the expected columns can not be found.
+
+### Building Expression Names
+
+There are currently two ways to create a name for an expression in the logical plan.
+
+```rust
+impl Expr {
+ /// Returns the name of this expression as it should appear in a schema. This name
+ /// will not include any CAST expressions.
+ pub fn display_name(&self) -> Result<String> {
+ create_name(self)
+ }
+
+ /// Returns a full and complete string representation of this expression.
+ pub fn canonical_name(&self) -> String {
+ format!("{}", self)
+ }
+}
+```
+
+When comparing expressions to determine if they are equivalent, `canonical_name` should be used, and when creating a
+name to be used in a schema, `display_name` should be used.
+
+### Utilities
+
+There are a number of utility methods provided that take care of some common tasks.
+
+### ExprVisitor
+
+The `ExprVisitor` and `ExprVisitable` traits provide a mechanism for applying a visitor pattern to an expression tree.
+
+Here is an example that demonstrates this.
+
+```rust
+fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec<Expr>) -> Result<()> {
+ struct InSubqueryVisitor<'a> {
+ accum: &'a mut Vec<Expr>,
+ }
+
+ impl ExpressionVisitor for InSubqueryVisitor<'_> {
+ fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
+ if let Expr::InSubquery { .. } = expr {
+ self.accum.push(expr.to_owned());
+ }
+ Ok(Recursion::Continue(self))
+ }
+ }
+
+ expression.accept(InSubqueryVisitor { accum: extracted })?;
+ Ok(())
+}
+```
+
+### Rewriting Expressions
+
+The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied
+to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait).
+
+The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression,
+consuming `self` producing a new expression.
+
+```rust
+let mut expr_rewriter = MyExprRewriter {};
+let expr = expr.rewrite(&mut expr_rewriter)?;
+```
+
+Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the
+implementation does not need to perform any recursion since this is handled by the `rewrite` method.
+
+```rust
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+ fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+ match expr {
+ Expr::Between {
+ negated,
+ expr,
+ low,
+ high,
+ } => {
+ let expr: Expr = expr.as_ref().clone();
+ let low: Expr = low.as_ref().clone();
+ let high: Expr = high.as_ref().clone();
+ if negated {
+ Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
+ } else {
+ Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
+ }
+ }
+ _ => Ok(expr.clone()),
+ }
+ }
+}
+```
+
+### optimize_children
+
+Typically a rule is applied recursively to all operators within a query plan. Rather than duplicate
+that logic in each rule, an `optimize_children` method is provided. This recursively invokes the `optimize` method on
+the plan's children and then returns a node of the same type.
+
+```rust
+fn optimize(
+ &self,
+ plan: &LogicalPlan,
+ _config: &mut OptimizerConfig,
+) -> Result<LogicalPlan> {
+ // recurse down and optimize children first
+ let plan = utils::optimize_children(self, plan, _config)?;
+
+ ...
+}
+```
+
+### Writing Tests
+
+There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan
+in isolation (without any other rule being applied).
+
+There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process.
+
+### Debugging
+
+The `EXPLAIN VERBOSE` command can be used to show the effect of each optimization rule on a query.
+
+In the following example, the `type_coercion` and `simplify_expressions` passes have simplified the plan so that it returns the constant `"3.2"` rather than doing a computation at execution time.
+
+```text
+❯ explain verbose select cast(1 + 2.2 as string) as foo;
++------------------------------------------------------------+---------------------------------------------------------------------------+
+| plan_type | plan |
++------------------------------------------------------------+---------------------------------------------------------------------------+
+| initial_logical_plan | Projection: CAST(Int64(1) + Float64(2.2) AS Utf8) AS foo |
+| | EmptyRelation |
+| logical_plan after type_coercion | Projection: CAST(CAST(Int64(1) AS Float64) + Float64(2.2) AS Utf8) AS foo |
+| | EmptyRelation |
+| logical_plan after simplify_expressions | Projection: Utf8("3.2") AS foo |
+| | EmptyRelation |
+| logical_plan after unwrap_cast_in_comparison | SAME TEXT AS ABOVE |
+| logical_plan after decorrelate_where_exists | SAME TEXT AS ABOVE |
+| logical_plan after decorrelate_where_in | SAME TEXT AS ABOVE |
+| logical_plan after scalar_subquery_to_join | SAME TEXT AS ABOVE |
+| logical_plan after subquery_filter_to_join | SAME TEXT AS ABOVE |
+| logical_plan after simplify_expressions | SAME TEXT AS ABOVE |
+| logical_plan after eliminate_filter | SAME TEXT AS ABOVE |
+| logical_plan after reduce_cross_join | SAME TEXT AS ABOVE |
+| logical_plan after common_sub_expression_eliminate | SAME TEXT AS ABOVE |
+| logical_plan after eliminate_limit | SAME TEXT AS ABOVE |
+| logical_plan after projection_push_down | SAME TEXT AS ABOVE |
+| logical_plan after rewrite_disjunctive_predicate | SAME TEXT AS ABOVE |
+| logical_plan after reduce_outer_join | SAME TEXT AS ABOVE |
+| logical_plan after filter_push_down | SAME TEXT AS ABOVE |
+| logical_plan after limit_push_down | SAME TEXT AS ABOVE |
+| logical_plan after single_distinct_aggregation_to_group_by | SAME TEXT AS ABOVE |
+| logical_plan | Projection: Utf8("3.2") AS foo |
+| | EmptyRelation |
+| initial_physical_plan | ProjectionExec: expr=[3.2 as foo] |
+| | EmptyExec: produce_one_row=true |
+| | |
+| physical_plan after aggregate_statistics | SAME TEXT AS ABOVE |
+| physical_plan after hash_build_probe_order | SAME TEXT AS ABOVE |
+| physical_plan after coalesce_batches | SAME TEXT AS ABOVE |
+| physical_plan after repartition | SAME TEXT AS ABOVE |
+| physical_plan after add_merge_exec | SAME TEXT AS ABOVE |
+| physical_plan | ProjectionExec: expr=[3.2 as foo] |
+| | EmptyExec: produce_one_row=true |
+| | |
++------------------------------------------------------------+---------------------------------------------------------------------------+
+```
[df]: https://crates.io/crates/datafusion
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index d33fe07c0..c0edba96f 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -546,7 +546,7 @@ impl ExprRewriter for CommonSubexprRewriter<'_> {
self.curr_index += 1;
}
- let expr_name = expr.name()?;
+ let expr_name = expr.display_name()?;
// Alias this `Column` expr to it original "expr name",
// `projection_push_down` optimizer use "expr name" to eliminate useless
// projections.
diff --git a/datafusion/optimizer/src/filter_push_down.rs b/datafusion/optimizer/src/filter_push_down.rs
index d1f696621..22b6aa5a1 100644
--- a/datafusion/optimizer/src/filter_push_down.rs
+++ b/datafusion/optimizer/src/filter_push_down.rs
@@ -406,7 +406,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result<LogicalPlan> {
let agg_columns = aggr_expr
.iter()
- .map(|x| Ok(Column::from_name(x.name()?)))
+ .map(|x| Ok(Column::from_name(x.display_name()?)))
.collect::<Result<HashSet<_>>>()?;
used_columns.extend(agg_columns);
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 87e4d1ffc..5e61ccc5a 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -43,7 +43,8 @@ use std::sync::Arc;
/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
/// computes the same results, but in a potentially more efficient
-/// way.
+/// way. If there are no suitable transformations for the input plan,
+/// the optimizer can simply return it as is.
pub trait OptimizerRule {
/// Rewrite `plan` to an optimized form
fn optimize(
diff --git a/datafusion/optimizer/src/projection_push_down.rs b/datafusion/optimizer/src/projection_push_down.rs
index 5a048aac7..9e61ccd6e 100644
--- a/datafusion/optimizer/src/projection_push_down.rs
+++ b/datafusion/optimizer/src/projection_push_down.rs
@@ -259,7 +259,7 @@ fn optimize_plan(
let mut new_window_expr = Vec::new();
{
window_expr.iter().try_for_each(|expr| {
- let name = &expr.name()?;
+ let name = &expr.display_name()?;
let column = Column::from_name(name);
if required_columns.contains(&column) {
new_window_expr.push(expr.clone());
@@ -317,7 +317,7 @@ fn optimize_plan(
// Gather all columns needed for expressions in this Aggregate
let mut new_aggr_expr = Vec::new();
aggr_expr.iter().try_for_each(|expr| {
- let name = &expr.name()?;
+ let name = &expr.display_name()?;
let column = Column::from_name(name);
if required_columns.contains(&column) {
new_aggr_expr.push(expr.clone());
diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs
index 7c203bdfe..ebb1be089 100644
--- a/datafusion/optimizer/src/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -310,12 +310,12 @@ impl SimplifyExpressions {
.map(|e| {
// We need to keep original expression name, if any.
// Constant folding should not change expression name.
- let name = &e.name();
+ let name = &e.display_name();
// Apply the actual simplification logic
let new_e = simplifier.simplify(e)?;
- let new_name = &new_e.name();
+ let new_name = &new_e.display_name();
if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) {
if expr_name != new_expr_name {
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 6d3343763..e6ec0e72f 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -91,7 +91,7 @@ fn optimize(plan: &LogicalPlan) -> Result<LogicalPlan> {
fun, args, filter, ..
} => {
// is_single_distinct_agg ensure args.len=1
- if group_fields_set.insert(args[0].name()?) {
+ if group_fields_set.insert(args[0].display_name()?) {
inner_group_exprs
.push(args[0].clone().alias(SINGLE_DISTINCT_ALIAS));
}
@@ -189,7 +189,7 @@ fn is_single_distinct_agg(plan: &LogicalPlan) -> Result<bool> {
distinct_count += 1;
}
for e in args {
- fields_set.insert(e.name()?);
+ fields_set.insert(e.display_name()?);
}
}
}
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 23aec3354..bf1968587 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -389,7 +389,7 @@ where
fn name_for_alias(expr: &Expr) -> Result<String> {
match expr {
Expr::Sort { expr, .. } => name_for_alias(expr),
- expr => expr.name(),
+ expr => expr.display_name(),
}
}
@@ -565,14 +565,14 @@ mod tests {
let expr = rewrite_preserving_name(expr_from.clone(), &mut rewriter).unwrap();
let original_name = match &expr_from {
- Expr::Sort { expr, .. } => expr.name(),
- expr => expr.name(),
+ Expr::Sort { expr, .. } => expr.display_name(),
+ expr => expr.display_name(),
}
.unwrap();
let new_name = match &expr {
- Expr::Sort { expr, .. } => expr.name(),
- expr => expr.name(),
+ Expr::Sort { expr, .. } => expr.display_name(),
+ expr => expr.display_name(),
}
.unwrap();