You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/06/19 13:56:43 UTC
[arrow-datafusion] branch main updated: refactor: alias() should skip add alias for `Expr::Sort` (#6707)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new feb4b8fbe1 refactor: alias() should skip add alias for `Expr::Sort` (#6707)
feb4b8fbe1 is described below
commit feb4b8fbe19311aa8f1153662f3f75d5235e8fcb
Author: jakevin <ja...@gmail.com>
AuthorDate: Mon Jun 19 21:56:38 2023 +0800
refactor: alias() should skip add alias for `Expr::Sort` (#6707)
* fix: alias() should skip add alias for `Expr::Sort`
* refactor
---
datafusion/expr/src/expr.rs | 31 +++++++++++++++++-
datafusion/expr/src/expr_rewriter/mod.rs | 38 ++--------------------
datafusion/optimizer/src/merge_projection.rs | 6 +---
datafusion/optimizer/src/push_down_projection.rs | 6 +---
.../optimizer/src/scalar_subquery_to_join.rs | 3 +-
.../src/simplify_expressions/simplify_exprs.rs | 20 ++----------
6 files changed, 39 insertions(+), 65 deletions(-)
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 6ac2404f9d..a0926b7d31 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -759,9 +759,38 @@ impl Expr {
Expr::ILike(Like::new(true, Box::new(self), Box::new(other), None))
}
+ /// Return the name to use for the specific Expr, recursing into
+ /// `Expr::Sort` as appropriate
+ pub fn name_for_alias(&self) -> Result<String> {
+ match self {
+ // call Expr::display_name() on a Expr::Sort will throw an error
+ Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(),
+ expr => expr.display_name(),
+ }
+ }
+
+ /// Ensure `expr` has the name as `original_name` by adding an
+ /// alias if necessary.
+ pub fn alias_if_changed(self, original_name: String) -> Result<Expr> {
+ let new_name = self.name_for_alias()?;
+
+ if new_name == original_name {
+ return Ok(self);
+ }
+
+ Ok(self.alias(original_name))
+ }
+
/// Return `self AS name` alias expression
pub fn alias(self, name: impl Into<String>) -> Expr {
- Expr::Alias(Box::new(self), name.into())
+ match self {
+ Expr::Sort(Sort {
+ expr,
+ asc,
+ nulls_first,
+ }) => Expr::Sort(Sort::new(Box::new(expr.alias(name)), asc, nulls_first)),
+ _ => Expr::Alias(Box::new(self), name.into()),
+ }
}
/// Remove an alias from an expression if one exists.
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 1e0d216cad..bc08bb4d66 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -17,7 +17,6 @@
//! Expression rewriter
-use crate::expr::Sort;
use crate::logical_plan::Projection;
use crate::{Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder};
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
@@ -252,46 +251,15 @@ pub fn rewrite_preserving_name<R>(expr: Expr, rewriter: &mut R) -> Result<Expr>
where
R: TreeNodeRewriter<N = Expr>,
{
- let original_name = name_for_alias(&expr)?;
+ let original_name = expr.name_for_alias()?;
let expr = expr.rewrite(rewriter)?;
- add_alias_if_changed(original_name, expr)
-}
-
-/// Return the name to use for the specific Expr, recursing into
-/// `Expr::Sort` as appropriate
-fn name_for_alias(expr: &Expr) -> Result<String> {
- match expr {
- // call Expr::display_name() on a Expr::Sort will throw an error
- Expr::Sort(Sort { expr, .. }) => name_for_alias(expr),
- expr => expr.display_name(),
- }
-}
-
-/// Ensure `expr` has the name as `original_name` by adding an
-/// alias if necessary.
-fn add_alias_if_changed(original_name: String, expr: Expr) -> Result<Expr> {
- let new_name = name_for_alias(&expr)?;
-
- if new_name == original_name {
- return Ok(expr);
- }
-
- Ok(match expr {
- Expr::Sort(Sort {
- expr,
- asc,
- nulls_first,
- }) => {
- let expr = add_alias_if_changed(original_name, *expr)?;
- Expr::Sort(Sort::new(Box::new(expr), asc, nulls_first))
- }
- expr => expr.alias(original_name),
- })
+ expr.alias_if_changed(original_name)
}
#[cfg(test)]
mod test {
use super::*;
+ use crate::expr::Sort;
use crate::{col, lit, Cast};
use arrow::datatypes::DataType;
use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter};
diff --git a/datafusion/optimizer/src/merge_projection.rs b/datafusion/optimizer/src/merge_projection.rs
index d551283015..1426aed146 100644
--- a/datafusion/optimizer/src/merge_projection.rs
+++ b/datafusion/optimizer/src/merge_projection.rs
@@ -55,11 +55,7 @@ impl OptimizerRule for MergeProjection {
let parent_expr = parent_projection.schema.fields()
[i]
.qualified_name();
- if e.display_name()? == parent_expr {
- Ok(e)
- } else {
- Ok(e.alias(parent_expr))
- }
+ e.alias_if_changed(parent_expr)
}
Err(e) => Err(e),
})
diff --git a/datafusion/optimizer/src/push_down_projection.rs b/datafusion/optimizer/src/push_down_projection.rs
index c64dfc578b..16ddb9b41f 100644
--- a/datafusion/optimizer/src/push_down_projection.rs
+++ b/datafusion/optimizer/src/push_down_projection.rs
@@ -102,11 +102,7 @@ impl OptimizerRule for PushDownProjection {
Ok(e) => {
let parent_expr =
projection.schema.fields()[i].qualified_name();
- if e.display_name()? == parent_expr {
- Ok(e)
- } else {
- Ok(e.alias(parent_expr))
- }
+ e.alias_if_changed(parent_expr)
}
Err(e) => Err(e),
})
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index fa06e1ddbf..e5f491b2c8 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -170,8 +170,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
let new_expr = expr_to_rewrite_expr_map.get(expr).unwrap();
let new_expr_name = new_expr.display_name()?;
if new_expr_name != old_expr_name {
- proj_exprs
- .push(Expr::Alias(Box::new(new_expr.clone()), old_expr_name))
+ proj_exprs.push(new_expr.clone().alias(old_expr_name))
} else {
proj_exprs.push(new_expr.clone());
}
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index 239497d9fa..c65768bb8b 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -86,24 +86,10 @@ impl SimplifyExpressions {
.expressions()
.into_iter()
.map(|e| {
- // We need to keep original expression name, if any.
- // Constant folding should not change expression name.
- let name = &e.display_name();
-
- // Apply the actual simplification logic
+ // TODO: unify with `rewrite_preserving_name`
+ let original_name = e.name_for_alias()?;
let new_e = simplifier.simplify(e)?;
-
- let new_name = &new_e.display_name();
-
- if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) {
- if expr_name != new_expr_name {
- Ok(new_e.alias(expr_name))
- } else {
- Ok(new_e)
- }
- } else {
- Ok(new_e)
- }
+ new_e.alias_if_changed(original_name)
})
.collect::<Result<Vec<_>>>()?;