You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2024/02/02 08:03:04 UTC
(arrow-datafusion) branch main updated: Fix update_expr for projection pushdown (#9096)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a6ef1bec48 Fix update_expr for projection pushdown (#9096)
a6ef1bec48 is described below
commit a6ef1bec480872f15f83628a7fb8c9bb2722cd49
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Fri Feb 2 00:02:59 2024 -0800
Fix update_expr for projection pushdown (#9096)
* Fix update_expr for projection pushdown
* Drop table
* Update existing test
* Update existing test
* Update test
* Fix
---
.../src/physical_optimizer/projection_pushdown.rs | 16 +++++----
datafusion/sqllogictest/test_files/join.slt | 41 +++++++++++++++++-----
2 files changed, 41 insertions(+), 16 deletions(-)
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 3045d5c3c5..e638f4a9a8 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -477,7 +477,7 @@ fn try_swapping_with_sort_preserving_merge(
projection: &ProjectionExec,
spm: &SortPreservingMergeExec,
) -> Result<Option<Arc<dyn ExecutionPlan>>> {
- // If the projection does not narrow the the schema, we should not try to push it down.
+ // If the projection does not narrow the schema, we should not try to push it down.
if projection.expr().len() >= projection.input().schema().fields().len() {
return Ok(None);
}
@@ -916,7 +916,9 @@ fn update_expr(
.find_map(|(index, (projected_expr, alias))| {
projected_expr.as_any().downcast_ref::<Column>().and_then(
|projected_column| {
- column.name().eq(projected_column.name()).then(|| {
+ (column.name().eq(projected_column.name())
+ && column.index() == projected_column.index())
+ .then(|| {
state = RewriteState::RewrittenValid;
Arc::new(Column::new(alias, index)) as _
})
@@ -1437,12 +1439,12 @@ mod tests {
)?),
];
let projected_exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![
- (Arc::new(Column::new("a", 0)), "a".to_owned()),
+ (Arc::new(Column::new("a", 3)), "a".to_owned()),
(Arc::new(Column::new("b", 1)), "b_new".to_owned()),
- (Arc::new(Column::new("c", 2)), "c".to_owned()),
- (Arc::new(Column::new("d", 3)), "d_new".to_owned()),
- (Arc::new(Column::new("e", 4)), "e".to_owned()),
- (Arc::new(Column::new("f", 5)), "f_new".to_owned()),
+ (Arc::new(Column::new("c", 0)), "c".to_owned()),
+ (Arc::new(Column::new("d", 2)), "d_new".to_owned()),
+ (Arc::new(Column::new("e", 5)), "e".to_owned()),
+ (Arc::new(Column::new("f", 4)), "f_new".to_owned()),
];
let expected_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index ca9b918ff3..e5cbf31c83 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -602,22 +602,23 @@ CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100);
statement ok
CREATE TABLE t2(a text, b int) AS VALUES ('Alice', 2), ('Alice', 1);
-# the current query results are incorrect, becuase the query was incorrectly rewritten as:
-# SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t1.b;
-# the difference is ORDER BY clause rewrite from t2.b to t1.b, it is incorrect.
-# after https://github.com/apache/arrow-datafusion/issues/8374 fixed, the correct result should be:
-# Alice 50
-# Alice 100
-# Alice 50
-# Alice 100
+# test 'ORDER BY' joined result with same column name
query TI
-SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t1.b;
----
Alice 50
Alice 50
Alice 100
Alice 100
+query TI
+SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
+----
+Alice 50
+Alice 100
+Alice 50
+Alice 100
+
query TITI
SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
----
@@ -663,3 +664,25 @@ DROP TABLE t1;
statement ok
DROP TABLE t2;
+
+# sort by company name and then by lead name
+statement ok
+CREATE TABLE companies(name VARCHAR, employees INT) AS VALUES ('Jeyork', 150),('Shalk', 350),('ShuttlP', 75)
+
+statement ok
+CREATE TABLE leads(name VARCHAR, company VARCHAR) AS VALUES ('Alex F', 'Jeyork'),('John B', 'Shalk'),('Samanta J', 'ShuttlP'),('Trevor R', 'Jeyork'),('Alice B', 'ShuttlP')
+
+query TT
+SELECT l.* FROM leads l LEFT JOIN companies c ON c."name" = l."company" ORDER BY c."name", l."name"
+----
+Alex F Jeyork
+Trevor R Jeyork
+John B Shalk
+Alice B ShuttlP
+Samanta J ShuttlP
+
+statement ok
+DROP TABLE companies
+
+statement ok
+DROP TABLE leads