You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/19 13:56:43 UTC

[arrow-datafusion] branch main updated: refactor: alias() should skip add alias for `Expr::Sort` (#6707)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new feb4b8fbe1 refactor: alias() should skip add alias for `Expr::Sort` (#6707)
feb4b8fbe1 is described below

commit feb4b8fbe19311aa8f1153662f3f75d5235e8fcb
Author: jakevin <ja...@gmail.com>
AuthorDate: Mon Jun 19 21:56:38 2023 +0800

    refactor: alias() should skip add alias for `Expr::Sort` (#6707)
    
    * fix: alias() should skip add alias for `Expr::Sort`
    
    * refactor
---
 datafusion/expr/src/expr.rs                        | 31 +++++++++++++++++-
 datafusion/expr/src/expr_rewriter/mod.rs           | 38 ++--------------------
 datafusion/optimizer/src/merge_projection.rs       |  6 +---
 datafusion/optimizer/src/push_down_projection.rs   |  6 +---
 .../optimizer/src/scalar_subquery_to_join.rs       |  3 +-
 .../src/simplify_expressions/simplify_exprs.rs     | 20 ++----------
 6 files changed, 39 insertions(+), 65 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 6ac2404f9d..a0926b7d31 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -759,9 +759,38 @@ impl Expr {
         Expr::ILike(Like::new(true, Box::new(self), Box::new(other), None))
     }
 
+    /// Return the name to use for the specific Expr, recursing into
+    /// `Expr::Sort` as appropriate
+    pub fn name_for_alias(&self) -> Result<String> {
+        match self {
+            // call Expr::display_name() on a Expr::Sort will throw an error
+            Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(),
+            expr => expr.display_name(),
+        }
+    }
+
+    /// Ensure `expr` has the name as `original_name` by adding an
+    /// alias if necessary.
+    pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
+        let new_name = self.name_for_alias()?;
+
+        if new_name == original_name {
+            return Ok(self);
+        }
+
+        Ok(self.alias(original_name))
+    }
+
     /// Return `self AS name` alias expression
     pub fn alias(self, name: impl Into<String>) -> Expr {
-        Expr::Alias(Box::new(self), name.into())
+        match self {
+            Expr::Sort(Sort {
+                expr,
+                asc,
+                nulls_first,
+            }) => Expr::Sort(Sort::new(Box::new(expr.alias(name)), asc, nulls_first)),
+            _ => Expr::Alias(Box::new(self), name.into()),
+        }
     }
 
     /// Remove an alias from an expression if one exists.
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 1e0d216cad..bc08bb4d66 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -17,7 +17,6 @@
 
 //! Expression rewriter
 
-use crate::expr::Sort;
 use crate::logical_plan::Projection;
 use crate::{Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder};
 use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
@@ -252,46 +251,15 @@ pub fn rewrite_preserving_name<R>(expr: Expr, rewriter: &mut R) -> Result<Expr>
 where
     R: TreeNodeRewriter<N = Expr>,
 {
-    let original_name = name_for_alias(&expr)?;
+    let original_name = expr.name_for_alias()?;
     let expr = expr.rewrite(rewriter)?;
-    add_alias_if_changed(original_name, expr)
-}
-
-/// Return the name to use for the specific Expr, recursing into
-/// `Expr::Sort` as appropriate
-fn name_for_alias(expr: &Expr) -> Result<String> {
-    match expr {
-        // call Expr::display_name() on a Expr::Sort will throw an error
-        Expr::Sort(Sort { expr, .. }) => name_for_alias(expr),
-        expr => expr.display_name(),
-    }
-}
-
-/// Ensure `expr` has the name as `original_name` by adding an
-/// alias if necessary.
-fn add_alias_if_changed(original_name: String, expr: Expr) -> Result<Expr> {
-    let new_name = name_for_alias(&expr)?;
-
-    if new_name == original_name {
-        return Ok(expr);
-    }
-
-    Ok(match expr {
-        Expr::Sort(Sort {
-            expr,
-            asc,
-            nulls_first,
-        }) => {
-            let expr = add_alias_if_changed(original_name, *expr)?;
-            Expr::Sort(Sort::new(Box::new(expr), asc, nulls_first))
-        }
-        expr => expr.alias(original_name),
-    })
+    expr.alias_if_changed(original_name)
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
+    use crate::expr::Sort;
     use crate::{col, lit, Cast};
     use arrow::datatypes::DataType;
     use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter};
diff --git a/datafusion/optimizer/src/merge_projection.rs b/datafusion/optimizer/src/merge_projection.rs
index d551283015..1426aed146 100644
--- a/datafusion/optimizer/src/merge_projection.rs
+++ b/datafusion/optimizer/src/merge_projection.rs
@@ -55,11 +55,7 @@ impl OptimizerRule for MergeProjection {
                                     let parent_expr = parent_projection.schema.fields()
                                         [i]
                                         .qualified_name();
-                                    if e.display_name()? == parent_expr {
-                                        Ok(e)
-                                    } else {
-                                        Ok(e.alias(parent_expr))
-                                    }
+                                    e.alias_if_changed(parent_expr)
                                 }
                                 Err(e) => Err(e),
                             })
diff --git a/datafusion/optimizer/src/push_down_projection.rs b/datafusion/optimizer/src/push_down_projection.rs
index c64dfc578b..16ddb9b41f 100644
--- a/datafusion/optimizer/src/push_down_projection.rs
+++ b/datafusion/optimizer/src/push_down_projection.rs
@@ -102,11 +102,7 @@ impl OptimizerRule for PushDownProjection {
                         Ok(e) => {
                             let parent_expr =
                                 projection.schema.fields()[i].qualified_name();
-                            if e.display_name()? == parent_expr {
-                                Ok(e)
-                            } else {
-                                Ok(e.alias(parent_expr))
-                            }
+                            e.alias_if_changed(parent_expr)
                         }
                         Err(e) => Err(e),
                     })
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index fa06e1ddbf..e5f491b2c8 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -170,8 +170,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                     let new_expr = expr_to_rewrite_expr_map.get(expr).unwrap();
                     let new_expr_name = new_expr.display_name()?;
                     if new_expr_name != old_expr_name {
-                        proj_exprs
-                            .push(Expr::Alias(Box::new(new_expr.clone()), old_expr_name))
+                        proj_exprs.push(new_expr.clone().alias(old_expr_name))
                     } else {
                         proj_exprs.push(new_expr.clone());
                     }
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index 239497d9fa..c65768bb8b 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -86,24 +86,10 @@ impl SimplifyExpressions {
             .expressions()
             .into_iter()
             .map(|e| {
-                // We need to keep original expression name, if any.
-                // Constant folding should not change expression name.
-                let name = &e.display_name();
-
-                // Apply the actual simplification logic
+                // TODO: unify with `rewrite_preserving_name`
+                let original_name = e.name_for_alias()?;
                 let new_e = simplifier.simplify(e)?;
-
-                let new_name = &new_e.display_name();
-
-                if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) {
-                    if expr_name != new_expr_name {
-                        Ok(new_e.alias(expr_name))
-                    } else {
-                        Ok(new_e)
-                    }
-                } else {
-                    Ok(new_e)
-                }
+                new_e.alias_if_changed(original_name)
             })
             .collect::<Result<Vec<_>>>()?;