You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/12 20:10:48 UTC

[arrow-datafusion] branch master updated: MINOR: Optimizer example and docs, deprecate `Expr::name` (#3788)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 4cb8ac094 MINOR: Optimizer example and docs, deprecate `Expr::name` (#3788)
4cb8ac094 is described below

commit 4cb8ac094ee88dc0023b4cde1b39840a0a362ee0
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Oct 12 14:10:41 2022 -0600

    MINOR: Optimizer example and docs, deprecate `Expr::name` (#3788)
    
    * Optimizer example and docs
    
    * more docs
    
    * clippy
    
    * Add notes on expression naming
    
    * debugging
    
    * add display_name and deprecate name
    
    * clippy
    
    * Update datafusion/optimizer/README.md
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
    
    * Update datafusion/optimizer/README.md
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
    
    * Update datafusion/optimizer/README.md
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
    
    * Update datafusion/optimizer/README.md
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
    
    * move example
    
    * move example
    
    * toml fmt
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
 datafusion-examples/Cargo.toml                     |   5 +
 datafusion-examples/examples/rewrite_expr.rs       | 163 +++++++++++
 datafusion/expr/src/expr.rs                        |  13 +-
 datafusion/expr/src/expr_schema.rs                 |   2 +-
 datafusion/expr/src/logical_plan/builder.rs        |   2 +-
 datafusion/expr/src/utils.rs                       |   4 +-
 datafusion/optimizer/README.md                     | 314 ++++++++++++++++++++-
 .../optimizer/src/common_subexpr_eliminate.rs      |   2 +-
 datafusion/optimizer/src/filter_push_down.rs       |   2 +-
 datafusion/optimizer/src/optimizer.rs              |   3 +-
 datafusion/optimizer/src/projection_push_down.rs   |   4 +-
 datafusion/optimizer/src/simplify_expressions.rs   |   4 +-
 .../optimizer/src/single_distinct_to_groupby.rs    |   4 +-
 datafusion/optimizer/src/utils.rs                  |  10 +-
 14 files changed, 508 insertions(+), 24 deletions(-)

diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 844bdba7a..2530f890f 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -34,9 +34,14 @@ path = "examples/avro_sql.rs"
 required-features = ["datafusion/avro"]
 
 [dev-dependencies]
+arrow = "24.0.0"
 arrow-flight = "24.0.0"
 async-trait = "0.1.41"
 datafusion = { path = "../datafusion/core" }
+datafusion-common = { path = "../datafusion/common" }
+datafusion-expr = { path = "../datafusion/expr" }
+datafusion-optimizer = { path = "../datafusion/optimizer" }
+datafusion-sql = { path = "../datafusion/sql" }
 futures = "0.3"
 num_cpus = "1.13.0"
 object_store = { version = "0.5.0", features = ["aws"] }
diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs
new file mode 100644
index 000000000..5e93bfd73
--- /dev/null
+++ b/datafusion-examples/examples/rewrite_expr.rs
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter};
+use datafusion_expr::{AggregateUDF, Expr, Filter, LogicalPlan, ScalarUDF, TableSource};
+use datafusion_optimizer::optimizer::Optimizer;
+use datafusion_optimizer::{utils, OptimizerConfig, OptimizerRule};
+use datafusion_sql::planner::{ContextProvider, SqlToRel};
+use datafusion_sql::sqlparser::dialect::PostgreSqlDialect;
+use datafusion_sql::sqlparser::parser::Parser;
+use datafusion_sql::TableReference;
+use std::any::Any;
+use std::sync::Arc;
+
+pub fn main() -> Result<()> {
+    // produce a logical plan using the datafusion-sql crate
+    let dialect = PostgreSqlDialect {};
+    let sql = "SELECT * FROM person WHERE age BETWEEN 21 AND 32";
+    let statements = Parser::parse_sql(&dialect, sql)?;
+
+    // produce a logical plan using the datafusion-sql crate
+    let context_provider = MyContextProvider {};
+    let sql_to_rel = SqlToRel::new(&context_provider);
+    let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?;
+    println!(
+        "Unoptimized Logical Plan:\n\n{}\n",
+        logical_plan.display_indent()
+    );
+
+    // now run the optimizer with our custom rule
+    let optimizer = Optimizer::with_rules(vec![Arc::new(MyRule {})]);
+    let mut optimizer_config = OptimizerConfig::default().with_skip_failing_rules(false);
+    let optimized_plan =
+        optimizer.optimize(&logical_plan, &mut optimizer_config, observe)?;
+    println!(
+        "Optimized Logical Plan:\n\n{}\n",
+        optimized_plan.display_indent()
+    );
+
+    Ok(())
+}
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+    println!(
+        "After applying rule '{}':\n{}\n",
+        rule.name(),
+        plan.display_indent()
+    )
+}
+
+struct MyRule {}
+
+impl OptimizerRule for MyRule {
+    fn name(&self) -> &str {
+        "my_rule"
+    }
+
+    fn optimize(
+        &self,
+        plan: &LogicalPlan,
+        _config: &mut OptimizerConfig,
+    ) -> Result<LogicalPlan> {
+        // recurse down and optimize children first
+        let plan = utils::optimize_children(self, plan, _config)?;
+
+        match plan {
+            LogicalPlan::Filter(filter) => {
+                let mut expr_rewriter = MyExprRewriter {};
+                let predicate = filter.predicate().clone();
+                let predicate = predicate.rewrite(&mut expr_rewriter)?;
+                Ok(LogicalPlan::Filter(Filter::try_new(
+                    predicate,
+                    filter.input().clone(),
+                )?))
+            }
+            _ => Ok(plan.clone()),
+        }
+    }
+}
+
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        match expr {
+            Expr::Between {
+                negated,
+                expr,
+                low,
+                high,
+            } => {
+                let expr: Expr = expr.as_ref().clone();
+                let low: Expr = low.as_ref().clone();
+                let high: Expr = high.as_ref().clone();
+                if negated {
+                    Ok(expr.clone().lt(low).or(expr.gt(high)))
+                } else {
+                    Ok(expr.clone().gt_eq(low).and(expr.lt_eq(high)))
+                }
+            }
+            _ => Ok(expr.clone()),
+        }
+    }
+}
+
+struct MyContextProvider {}
+
+impl ContextProvider for MyContextProvider {
+    fn get_table_provider(&self, name: TableReference) -> Result<Arc<dyn TableSource>> {
+        if name.table() == "person" {
+            Ok(Arc::new(MyTableSource {
+                schema: Arc::new(Schema::new(vec![
+                    Field::new("name", DataType::Utf8, false),
+                    Field::new("age", DataType::UInt8, false),
+                ])),
+            }))
+        } else {
+            Err(DataFusionError::Plan("table not found".to_string()))
+        }
+    }
+
+    fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
+        None
+    }
+
+    fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
+        None
+    }
+
+    fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> {
+        None
+    }
+}
+
+struct MyTableSource {
+    schema: SchemaRef,
+}
+
+impl TableSource for MyTableSource {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c21dd5c88..6f683dc7f 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -367,10 +367,17 @@ impl PartialOrd for Expr {
 impl Expr {
     /// Returns the name of this expression as it should appear in a schema. This name
     /// will not include any CAST expressions.
-    pub fn name(&self) -> Result<String> {
+    pub fn display_name(&self) -> Result<String> {
         create_name(self)
     }
 
+    /// Returns the name of this expression as it should appear in a schema. This name
+    /// will not include any CAST expressions.
+    #[deprecated(since = "14.0.0", note = "please use `display_name` instead")]
+    pub fn name(&self) -> Result<String> {
+        self.display_name()
+    }
+
     /// Returns a full and complete string representation of this expression.
     pub fn canonical_name(&self) -> String {
         format!("{}", self)
@@ -1186,7 +1193,7 @@ mod test {
         assert_eq!(expected, expr.canonical_name());
         assert_eq!(expected, format!("{}", expr));
         assert_eq!(expected, format!("{:?}", expr));
-        assert_eq!(expected, expr.name()?);
+        assert_eq!(expected, expr.display_name()?);
         Ok(())
     }
 
@@ -1202,7 +1209,7 @@ mod test {
         assert_eq!(expected_canonical, format!("{:?}", expr));
         // note that CAST intentionally has a name that is different from its `Display`
         // representation. CAST does not change the name of expressions.
-        assert_eq!("Float32(1.23)", expr.name()?);
+        assert_eq!("Float32(1.23)", expr.display_name()?);
         Ok(())
     }
 
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 5442a2421..cfcbcc940 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -240,7 +240,7 @@ impl ExprSchemable for Expr {
             )),
             _ => Ok(DFField::new(
                 None,
-                &self.name()?,
+                &self.display_name()?,
                 self.get_type(input_schema)?,
                 self.nullable(input_schema)?,
             )),
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 300c3b8cb..631a64da6 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -841,7 +841,7 @@ pub(crate) fn validate_unique_names<'a>(
 ) -> Result<()> {
     let mut unique_names = HashMap::new();
     expressions.into_iter().enumerate().try_for_each(|(position, expr)| {
-        let name = expr.name()?;
+        let name = expr.display_name()?;
         match unique_names.get(&name) {
             None => {
                 unique_names.insert(name, (position, expr));
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 8e2544793..be4c45f8c 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -676,7 +676,7 @@ pub fn columnize_expr(e: Expr, input_schema: &DFSchema) -> Expr {
             Expr::Alias(Box::new(columnize_expr(*inner_expr, input_schema)), name)
         }
         Expr::ScalarSubquery(_) => e.clone(),
-        _ => match e.name() {
+        _ => match e.display_name() {
             Ok(name) => match input_schema.field_with_unqualified_name(&name) {
                 Ok(field) => Expr::Column(field.qualified_column()),
                 // expression not provided as input, do not convert to a column reference
@@ -728,7 +728,7 @@ pub fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result<Expr> {
             let field = plan.schema().field_from_column(col)?;
             Ok(Expr::Column(field.qualified_column()))
         }
-        _ => Ok(Expr::Column(Column::from_name(expr.name()?))),
+        _ => Ok(Expr::Column(Column::from_name(expr.display_name()?))),
     }
 }
 
diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md
index 39d28a8fa..b8edeea87 100644
--- a/datafusion/optimizer/README.md
+++ b/datafusion/optimizer/README.md
@@ -17,10 +17,318 @@
   under the License.
 -->
 
-# DataFusion Query Optimizer Rules
+# DataFusion Query Optimizer
 
-[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
+[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory
+format.
 
-This crate is a submodule of DataFusion that provides query optimizer rules.
+DataFusion has modular design, allowing individual crates to be re-used in other projects.
+
+This crate is a submodule of DataFusion that provides a query optimizer for logical plans, and
+contains an extensive set of OptimizerRules that may rewrite the plan and/or its expressions so
+they execute more quickly while still computing the same result.
+
+## Running the Optimizer
+
+The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules
+and applying it to a logical plan to produce an optimized logical plan.
+
+```rust
+
+// We need a logical plan as the starting point. There are many ways to build a logical plan:
+//
+// The `datafusion-expr` crate provides a LogicalPlanBuilder
+// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL
+// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan
+let logical_plan = ...
+
+let mut config = OptimizerConfig::default();
+let optimizer = Optimizer::new(&config);
+let optimized_plan = optimizer.optimize(&logical_plan, &mut config, observe)?;
+
+fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) {
+    println!(
+        "After applying rule '{}':\n{}",
+        rule.name(),
+        plan.display_indent()
+    )
+}
+```
+
+## Providing Custom Rules
+
+The optimizer can be created with a custom set of rules.
+
+```rust
+let optimizer = Optimizer::with_rules(vec![
+    Arc::new(MyRule {})
+]);
+```
+
+## Writing Optimization Rules
+
+Please refer to the [rewrite_expr example](../../datafusion-examples/examples/rewrite_expr.rs) to learn more about
+the general approach to writing optimizer rules and then move onto studying the existing rules.
+
+All rules must implement the `OptimizerRule` trait.
+
+```rust
+/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
+/// computes the same results, but in a potentially more efficient
+/// way. If there are no suitable transformations for the input plan,
+/// the optimizer can simply return it as is.
+pub trait OptimizerRule {
+    /// Rewrite `plan` to an optimized form
+    fn optimize(
+        &self,
+        plan: &LogicalPlan,
+        optimizer_config: &mut OptimizerConfig,
+    ) -> Result<LogicalPlan>;
+
+    /// A human readable name for this optimizer rule
+    fn name(&self) -> &str;
+}
+```
+
+### General Guidelines
+
+Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate
+individual operators or expressions.
+
+Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs
+the actual optimization. This approach is used in projection push down and filter push down.
+
+### Expression Naming
+
+Every expression in DataFusion has a name, which is used as the column name. For example, in this example the output
+contains a single column with the name `"COUNT(aggregate_test_100.c9)"`:
+
+```text
+❯ select count(c9) from aggregate_test_100;
++------------------------------+
+| COUNT(aggregate_test_100.c9) |
++------------------------------+
+| 100                          |
++------------------------------+
+```
+
+These names are used to refer to the columns in both subqueries as well as internally from one stage of the LogicalPlan
+to another. For example:
+
+```text
+❯ select "COUNT(aggregate_test_100.c9)" + 1 from (select count(c9) from aggregate_test_100) as sq;
++--------------------------------------------+
+| sq.COUNT(aggregate_test_100.c9) + Int64(1) |
++--------------------------------------------+
+| 101                                        |
++--------------------------------------------+
+```
+
+### Implication
+
+Because DataFusion identifies columns using a string name, it means it is critical that the names of expressions are
+not changed by the optimizer when it rewrites expressions. This is typically accomplished by renaming a rewritten
+expression by adding an alias.
+
+Here is a simple example of such a rewrite. The expression `1 + 2` can be internally simplified to 3 but must still be
+displayed the same as `1 + 2`:
+
+```text
+❯ select 1 + 2;
++---------------------+
+| Int64(1) + Int64(2) |
++---------------------+
+| 3                   |
++---------------------+
+```
+
+Looking at the `EXPLAIN` output we can see that the optimizer has effectively rewritten `1 + 2` into effectively
+`3 as "1 + 2"`:
+
+```text
+❯ explain select 1 + 2;
++---------------+-------------------------------------------------+
+| plan_type     | plan                                            |
++---------------+-------------------------------------------------+
+| logical_plan  | Projection: Int64(3) AS Int64(1) + Int64(2)     |
+|               |   EmptyRelation                                 |
+| physical_plan | ProjectionExec: expr=[3 as Int64(1) + Int64(2)] |
+|               |   EmptyExec: produce_one_row=true               |
+|               |                                                 |
++---------------+-------------------------------------------------+
+```
+
+If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/arrow-datafusion/issues/3704)
+and [#3555](https://github.com/apache/arrow-datafusion/issues/3555) occur where the expected columns can not be found.
+
+### Building Expression Names
+
+There are currently two ways to create a name for an expression in the logical plan.
+
+```rust
+impl Expr {
+    /// Returns the name of this expression as it should appear in a schema. This name
+    /// will not include any CAST expressions.
+    pub fn display_name(&self) -> Result<String> {
+        create_name(self)
+    }
+
+    /// Returns a full and complete string representation of this expression.
+    pub fn canonical_name(&self) -> String {
+        format!("{}", self)
+    }
+}
+```
+
+When comparing expressions to determine if they are equivalent, `canonical_name` should be used, and when creating a
+name to be used in a schema, `display_name` should be used.
+
+### Utilities
+
+There are a number of utility methods provided that take care of some common tasks.
+
+### ExprVisitor
+
+The `ExprVisitor` and `ExprVisitable` traits provide a mechanism for applying a visitor pattern to an expression tree.
+
+Here is an example that demonstrates this.
+
+```rust
+fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec<Expr>) -> Result<()> {
+    struct InSubqueryVisitor<'a> {
+        accum: &'a mut Vec<Expr>,
+    }
+
+    impl ExpressionVisitor for InSubqueryVisitor<'_> {
+        fn pre_visit(self, expr: &Expr) -> Result<Recursion<Self>> {
+            if let Expr::InSubquery { .. } = expr {
+                self.accum.push(expr.to_owned());
+            }
+            Ok(Recursion::Continue(self))
+        }
+    }
+
+    expression.accept(InSubqueryVisitor { accum: extracted })?;
+    Ok(())
+}
+```
+
+### Rewriting Expressions
+
+The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied
+to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait).
+
+The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression,
+consuming `self` producing a new expression.
+
+```rust
+let mut expr_rewriter = MyExprRewriter {};
+let expr = expr.rewrite(&mut expr_rewriter)?;
+```
+
+Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the
+implementation does not need to perform any recursion since this is handled by the `rewrite` method.
+
+```rust
+struct MyExprRewriter {}
+
+impl ExprRewriter for MyExprRewriter {
+    fn mutate(&mut self, expr: Expr) -> Result<Expr> {
+        match expr {
+            Expr::Between {
+                negated,
+                expr,
+                low,
+                high,
+            } => {
+                let expr: Expr = expr.as_ref().clone();
+                let low: Expr = low.as_ref().clone();
+                let high: Expr = high.as_ref().clone();
+                if negated {
+                    Ok(expr.clone().lt(low).or(expr.clone().gt(high)))
+                } else {
+                    Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high)))
+                }
+            }
+            _ => Ok(expr.clone()),
+        }
+    }
+}
+```
+
+### optimize_children
+
+Typically a rule is applied recursively to all operators within a query plan. Rather than duplicate
+that logic in each rule, an `optimize_children` method is provided. This recursively invokes the `optimize` method on
+the plan's children and then returns a node of the same type.
+
+```rust
+fn optimize(
+    &self,
+    plan: &LogicalPlan,
+    _config: &mut OptimizerConfig,
+) -> Result<LogicalPlan> {
+    // recurse down and optimize children first
+    let plan = utils::optimize_children(self, plan, _config)?;
+
+    ...
+}
+```
+
+### Writing Tests
+
+There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan
+in isolation (without any other rule being applied).
+
+There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process.
+
+### Debugging
+
+The `EXPLAIN VERBOSE` command can be used to show the effect of each optimization rule on a query.
+
+In the following example, the `type_coercion` and `simplify_expressions` passes have simplified the plan so that it returns the constant `"3.2"` rather than doing a computation at execution time.
+
+```text
+❯ explain verbose select cast(1 + 2.2 as string) as foo;
++------------------------------------------------------------+---------------------------------------------------------------------------+
+| plan_type                                                  | plan                                                                      |
++------------------------------------------------------------+---------------------------------------------------------------------------+
+| initial_logical_plan                                       | Projection: CAST(Int64(1) + Float64(2.2) AS Utf8) AS foo                  |
+|                                                            |   EmptyRelation                                                           |
+| logical_plan after type_coercion                           | Projection: CAST(CAST(Int64(1) AS Float64) + Float64(2.2) AS Utf8) AS foo |
+|                                                            |   EmptyRelation                                                           |
+| logical_plan after simplify_expressions                    | Projection: Utf8("3.2") AS foo                                            |
+|                                                            |   EmptyRelation                                                           |
+| logical_plan after unwrap_cast_in_comparison               | SAME TEXT AS ABOVE                                                        |
+| logical_plan after decorrelate_where_exists                | SAME TEXT AS ABOVE                                                        |
+| logical_plan after decorrelate_where_in                    | SAME TEXT AS ABOVE                                                        |
+| logical_plan after scalar_subquery_to_join                 | SAME TEXT AS ABOVE                                                        |
+| logical_plan after subquery_filter_to_join                 | SAME TEXT AS ABOVE                                                        |
+| logical_plan after simplify_expressions                    | SAME TEXT AS ABOVE                                                        |
+| logical_plan after eliminate_filter                        | SAME TEXT AS ABOVE                                                        |
+| logical_plan after reduce_cross_join                       | SAME TEXT AS ABOVE                                                        |
+| logical_plan after common_sub_expression_eliminate         | SAME TEXT AS ABOVE                                                        |
+| logical_plan after eliminate_limit                         | SAME TEXT AS ABOVE                                                        |
+| logical_plan after projection_push_down                    | SAME TEXT AS ABOVE                                                        |
+| logical_plan after rewrite_disjunctive_predicate           | SAME TEXT AS ABOVE                                                        |
+| logical_plan after reduce_outer_join                       | SAME TEXT AS ABOVE                                                        |
+| logical_plan after filter_push_down                        | SAME TEXT AS ABOVE                                                        |
+| logical_plan after limit_push_down                         | SAME TEXT AS ABOVE                                                        |
+| logical_plan after single_distinct_aggregation_to_group_by | SAME TEXT AS ABOVE                                                        |
+| logical_plan                                               | Projection: Utf8("3.2") AS foo                                            |
+|                                                            |   EmptyRelation                                                           |
+| initial_physical_plan                                      | ProjectionExec: expr=[3.2 as foo]                                         |
+|                                                            |   EmptyExec: produce_one_row=true                                         |
+|                                                            |                                                                           |
+| physical_plan after aggregate_statistics                   | SAME TEXT AS ABOVE                                                        |
+| physical_plan after hash_build_probe_order                 | SAME TEXT AS ABOVE                                                        |
+| physical_plan after coalesce_batches                       | SAME TEXT AS ABOVE                                                        |
+| physical_plan after repartition                            | SAME TEXT AS ABOVE                                                        |
+| physical_plan after add_merge_exec                         | SAME TEXT AS ABOVE                                                        |
+| physical_plan                                              | ProjectionExec: expr=[3.2 as foo]                                         |
+|                                                            |   EmptyExec: produce_one_row=true                                         |
+|                                                            |                                                                           |
++------------------------------------------------------------+---------------------------------------------------------------------------+
+```
 
 [df]: https://crates.io/crates/datafusion
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index d33fe07c0..c0edba96f 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -546,7 +546,7 @@ impl ExprRewriter for CommonSubexprRewriter<'_> {
             self.curr_index += 1;
         }
 
-        let expr_name = expr.name()?;
+        let expr_name = expr.display_name()?;
         // Alias this `Column` expr to it original "expr name",
         // `projection_push_down` optimizer use "expr name" to eliminate useless
         // projections.
diff --git a/datafusion/optimizer/src/filter_push_down.rs b/datafusion/optimizer/src/filter_push_down.rs
index d1f696621..22b6aa5a1 100644
--- a/datafusion/optimizer/src/filter_push_down.rs
+++ b/datafusion/optimizer/src/filter_push_down.rs
@@ -406,7 +406,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result<LogicalPlan> {
 
             let agg_columns = aggr_expr
                 .iter()
-                .map(|x| Ok(Column::from_name(x.name()?)))
+                .map(|x| Ok(Column::from_name(x.display_name()?)))
                 .collect::<Result<HashSet<_>>>()?;
             used_columns.extend(agg_columns);
 
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 87e4d1ffc..5e61ccc5a 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -43,7 +43,8 @@ use std::sync::Arc;
 
 /// `OptimizerRule` transforms one ['LogicalPlan'] into another which
 /// computes the same results, but in a potentially more efficient
-/// way.
+/// way. If there are no suitable transformations for the input plan,
+/// the optimizer can simply return it as is.
 pub trait OptimizerRule {
     /// Rewrite `plan` to an optimized form
     fn optimize(
diff --git a/datafusion/optimizer/src/projection_push_down.rs b/datafusion/optimizer/src/projection_push_down.rs
index 5a048aac7..9e61ccd6e 100644
--- a/datafusion/optimizer/src/projection_push_down.rs
+++ b/datafusion/optimizer/src/projection_push_down.rs
@@ -259,7 +259,7 @@ fn optimize_plan(
             let mut new_window_expr = Vec::new();
             {
                 window_expr.iter().try_for_each(|expr| {
-                    let name = &expr.name()?;
+                    let name = &expr.display_name()?;
                     let column = Column::from_name(name);
                     if required_columns.contains(&column) {
                         new_window_expr.push(expr.clone());
@@ -317,7 +317,7 @@ fn optimize_plan(
             // Gather all columns needed for expressions in this Aggregate
             let mut new_aggr_expr = Vec::new();
             aggr_expr.iter().try_for_each(|expr| {
-                let name = &expr.name()?;
+                let name = &expr.display_name()?;
                 let column = Column::from_name(name);
                 if required_columns.contains(&column) {
                     new_aggr_expr.push(expr.clone());
diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs
index 7c203bdfe..ebb1be089 100644
--- a/datafusion/optimizer/src/simplify_expressions.rs
+++ b/datafusion/optimizer/src/simplify_expressions.rs
@@ -310,12 +310,12 @@ impl SimplifyExpressions {
             .map(|e| {
                 // We need to keep original expression name, if any.
                 // Constant folding should not change expression name.
-                let name = &e.name();
+                let name = &e.display_name();
 
                 // Apply the actual simplification logic
                 let new_e = simplifier.simplify(e)?;
 
-                let new_name = &new_e.name();
+                let new_name = &new_e.display_name();
 
                 if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) {
                     if expr_name != new_expr_name {
diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 6d3343763..e6ec0e72f 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -91,7 +91,7 @@ fn optimize(plan: &LogicalPlan) -> Result<LogicalPlan> {
                             fun, args, filter, ..
                         } => {
                             // is_single_distinct_agg ensure args.len=1
-                            if group_fields_set.insert(args[0].name()?) {
+                            if group_fields_set.insert(args[0].display_name()?) {
                                 inner_group_exprs
                                     .push(args[0].clone().alias(SINGLE_DISTINCT_ALIAS));
                             }
@@ -189,7 +189,7 @@ fn is_single_distinct_agg(plan: &LogicalPlan) -> Result<bool> {
                         distinct_count += 1;
                     }
                     for e in args {
-                        fields_set.insert(e.name()?);
+                        fields_set.insert(e.display_name()?);
                     }
                 }
             }
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 23aec3354..bf1968587 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -389,7 +389,7 @@ where
 fn name_for_alias(expr: &Expr) -> Result<String> {
     match expr {
         Expr::Sort { expr, .. } => name_for_alias(expr),
-        expr => expr.name(),
+        expr => expr.display_name(),
     }
 }
 
@@ -565,14 +565,14 @@ mod tests {
         let expr = rewrite_preserving_name(expr_from.clone(), &mut rewriter).unwrap();
 
         let original_name = match &expr_from {
-            Expr::Sort { expr, .. } => expr.name(),
-            expr => expr.name(),
+            Expr::Sort { expr, .. } => expr.display_name(),
+            expr => expr.display_name(),
         }
         .unwrap();
 
         let new_name = match &expr {
-            Expr::Sort { expr, .. } => expr.name(),
-            expr => expr.name(),
+            Expr::Sort { expr, .. } => expr.display_name(),
+            expr => expr.display_name(),
         }
         .unwrap();