You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/07/15 11:20:06 UTC
[arrow-datafusion] branch master updated: Fix invalid projection in `CommonSubexprEliminate` (#2915)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new b5537e753 Fix invalid projection in `CommonSubexprEliminate` (#2915)
b5537e753 is described below
commit b5537e753078f1a97315d641ea3608b6635c1069
Author: Andy Grove <ag...@apache.org>
AuthorDate: Fri Jul 15 05:20:01 2022 -0600
Fix invalid projection in `CommonSubexprEliminate` (#2915)
* unit test to reproduce the bug
* fix the bug
* remove redundant merge of input schema
---
.../optimizer/src/common_subexpr_eliminate.rs | 33 +++++++++++++++++++---
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index 3964bee6b..8627b404d 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -282,15 +282,13 @@ fn build_project_plan(
}
for field in input.schema().fields() {
- if !fields_set.contains(field.name()) {
- fields_set.insert(field.name().to_owned());
+ if fields_set.insert(field.qualified_name()) {
fields.push(field.clone());
project_exprs.push(Expr::Column(field.qualified_column()));
}
}
- let mut schema = DFSchema::new_with_metadata(fields, HashMap::new())?;
- schema.merge(input.schema());
+ let schema = DFSchema::new_with_metadata(fields, HashMap::new())?;
Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
project_exprs,
@@ -699,6 +697,7 @@ fn replace_common_expr(
mod test {
use super::*;
use crate::test::*;
+ use datafusion_expr::logical_plan::JoinType;
use datafusion_expr::{
avg, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, sum,
Operator,
@@ -890,4 +889,30 @@ mod test {
assert!(field_set.insert(field.qualified_name()));
}
}
+
+ #[test]
+ fn redundant_project_fields_join_input() {
+ let table_scan_1 = test_table_scan_with_name("test1").unwrap();
+ let table_scan_2 = test_table_scan_with_name("test2").unwrap();
+ let join = LogicalPlanBuilder::from(table_scan_1)
+ .join(&table_scan_2, JoinType::Inner, (vec!["a"], vec!["a"]), None)
+ .unwrap()
+ .build()
+ .unwrap();
+ let affected_id: HashSet<Identifier> =
+ ["c+a".to_string(), "d+a".to_string()].into_iter().collect();
+ let expr_set = [
+ ("c+a".to_string(), (col("c+a"), 1, DataType::UInt32)),
+ ("d+a".to_string(), (col("d+a"), 1, DataType::UInt32)),
+ ]
+ .into_iter()
+ .collect();
+ let project = build_project_plan(join, affected_id.clone(), &expr_set).unwrap();
+ let project_2 = build_project_plan(project, affected_id, &expr_set).unwrap();
+
+ let mut field_set = HashSet::new();
+ for field in project_2.schema().fields() {
+ assert!(field_set.insert(field.qualified_name()));
+ }
+ }
}