You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/11 22:23:08 UTC

[arrow-datafusion] branch master updated: Remove some uneeded code in `CommonSubexprEliminate` (#3730)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 24ede6af6 Remove some uneeded code in `CommonSubexprEliminate` (#3730)
24ede6af6 is described below

commit 24ede6af6a209923a5bf4e1b1c24427a887e25e0
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Tue Oct 11 18:23:04 2022 -0400

    Remove some uneeded code in `CommonSubexprEliminate` (#3730)
---
 .../optimizer/src/common_subexpr_eliminate.rs      | 74 +++-------------------
 1 file changed, 9 insertions(+), 65 deletions(-)

diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index cea5e8c46..db3d2ca8d 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -95,9 +95,7 @@ fn optimize(
             alias,
         }) => {
             let input_schema = Arc::clone(input.schema());
-            let all_schemas: Vec<DFSchemaRef> =
-                plan.all_schemas().into_iter().cloned().collect();
-            let arrays = to_arrays(expr, input_schema, all_schemas, &mut expr_set)?;
+            let arrays = to_arrays(expr, input_schema, &mut expr_set)?;
 
             let (mut new_expr, new_input) = rewrite_expr(
                 &[expr],
@@ -116,17 +114,8 @@ fn optimize(
         }
         LogicalPlan::Filter(Filter { predicate, input }) => {
             let input_schema = Arc::clone(input.schema());
-            let all_schemas: Vec<DFSchemaRef> =
-                plan.all_schemas().into_iter().cloned().collect();
-
             let mut id_array = vec![];
-            expr_to_identifier(
-                predicate,
-                &mut expr_set,
-                &mut id_array,
-                input_schema,
-                all_schemas,
-            )?;
+            expr_to_identifier(predicate, &mut expr_set, &mut id_array, input_schema)?;
 
             let (mut new_expr, new_input) = rewrite_expr(
                 &[&[predicate.clone()]],
@@ -153,10 +142,7 @@ fn optimize(
             schema,
         }) => {
             let input_schema = Arc::clone(input.schema());
-            let all_schemas: Vec<DFSchemaRef> =
-                plan.all_schemas().into_iter().cloned().collect();
-            let arrays =
-                to_arrays(window_expr, input_schema, all_schemas, &mut expr_set)?;
+            let arrays = to_arrays(window_expr, input_schema, &mut expr_set)?;
 
             let (mut new_expr, new_input) = rewrite_expr(
                 &[window_expr],
@@ -179,16 +165,9 @@ fn optimize(
             schema,
         }) => {
             let input_schema = Arc::clone(input.schema());
-            let all_schemas: Vec<DFSchemaRef> =
-                plan.all_schemas().into_iter().cloned().collect();
-            let group_arrays = to_arrays(
-                group_expr,
-                Arc::clone(&input_schema),
-                all_schemas.clone(),
-                &mut expr_set,
-            )?;
-            let aggr_arrays =
-                to_arrays(aggr_expr, input_schema, all_schemas, &mut expr_set)?;
+            let group_arrays =
+                to_arrays(group_expr, Arc::clone(&input_schema), &mut expr_set)?;
+            let aggr_arrays = to_arrays(aggr_expr, input_schema, &mut expr_set)?;
 
             let (mut new_expr, new_input) = rewrite_expr(
                 &[group_expr, aggr_expr],
@@ -210,9 +189,7 @@ fn optimize(
         }
         LogicalPlan::Sort(Sort { expr, input, fetch }) => {
             let input_schema = Arc::clone(input.schema());
-            let all_schemas: Vec<DFSchemaRef> =
-                plan.all_schemas().into_iter().cloned().collect();
-            let arrays = to_arrays(expr, input_schema, all_schemas, &mut expr_set)?;
+            let arrays = to_arrays(expr, input_schema, &mut expr_set)?;
 
             let (mut new_expr, new_input) = rewrite_expr(
                 &[expr],
@@ -271,19 +248,12 @@ fn pop_expr(new_expr: &mut Vec<Vec<Expr>>) -> Result<Vec<Expr>> {
 fn to_arrays(
     expr: &[Expr],
     input_schema: DFSchemaRef,
-    all_schemas: Vec<DFSchemaRef>,
     expr_set: &mut ExprSet,
 ) -> Result<Vec<Vec<(usize, String)>>> {
     expr.iter()
         .map(|e| {
             let mut id_array = vec![];
-            expr_to_identifier(
-                e,
-                expr_set,
-                &mut id_array,
-                Arc::clone(&input_schema),
-                all_schemas.clone(),
-            )?;
+            expr_to_identifier(e, expr_set, &mut id_array, Arc::clone(&input_schema))?;
 
             Ok(id_array)
         })
@@ -394,13 +364,6 @@ struct ExprIdentifierVisitor<'a> {
     /// input schema for the node that we're optimizing, so we can determine the correct datatype
     /// for each subexpression
     input_schema: DFSchemaRef,
-    /// all schemas in the logical plan, as a fall back if we cannot resolve an expression type
-    /// from the input schema alone
-    // This fallback should never be necessary as the expression datatype should always be
-    // resolvable from the input schema of the node that's being optimized.
-    // todo: This can likely be removed if we are sure it's safe to do so.
-    all_schemas: Vec<DFSchemaRef>,
-
     // inner states
     visit_stack: Vec<VisitRecord>,
     /// increased in pre_visit, start from 0.
@@ -478,23 +441,7 @@ impl ExpressionVisitor for ExprIdentifierVisitor<'_> {
         self.id_array[idx] = (self.series_number, desc.clone());
         self.visit_stack.push(VisitRecord::ExprItem(desc.clone()));
 
-        let data_type = if let Ok(data_type) = expr.get_type(&self.input_schema) {
-            data_type
-        } else {
-            // Expression type could not be resolved in schema, fall back to all schemas.
-            //
-            // This fallback should never be necessary as the expression datatype should always be
-            // resolvable from the input schema of the node that's being optimized.
-            // todo: This else-branch can likely be removed if we are sure it's safe to do so.
-            let merged_schema =
-                self.all_schemas
-                    .iter()
-                    .fold(DFSchema::empty(), |mut lhs, rhs| {
-                        lhs.merge(rhs);
-                        lhs
-                    });
-            expr.get_type(&merged_schema)?
-        };
+        let data_type = expr.get_type(&self.input_schema)?;
 
         self.expr_set
             .entry(desc)
@@ -510,13 +457,11 @@ fn expr_to_identifier(
     expr_set: &mut ExprSet,
     id_array: &mut Vec<(usize, Identifier)>,
     input_schema: DFSchemaRef,
-    all_schemas: Vec<DFSchemaRef>,
 ) -> Result<()> {
     expr.accept(ExprIdentifierVisitor {
         expr_set,
         id_array,
         input_schema,
-        all_schemas,
         visit_stack: vec![],
         node_count: 0,
         series_number: 0,
@@ -669,7 +614,6 @@ mod test {
             &mut HashMap::new(),
             &mut id_array,
             Arc::clone(&schema),
-            vec![schema],
         )?;
 
         let expected = vec![