You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2024/02/02 08:03:04 UTC

(arrow-datafusion) branch main updated: Fix update_expr for projection pushdown (#9096)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a6ef1bec48 Fix update_expr for projection pushdown (#9096)
a6ef1bec48 is described below

commit a6ef1bec480872f15f83628a7fb8c9bb2722cd49
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Fri Feb 2 00:02:59 2024 -0800

    Fix update_expr for projection pushdown (#9096)
    
    * Fix update_expr for projection pushdown
    
    * Drop table
    
    * Update existing test
    
    * Update existing test
    
    * Update test
    
    * Fix
---
 .../src/physical_optimizer/projection_pushdown.rs  | 16 +++++----
 datafusion/sqllogictest/test_files/join.slt        | 41 +++++++++++++++++-----
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 3045d5c3c5..e638f4a9a8 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -477,7 +477,7 @@ fn try_swapping_with_sort_preserving_merge(
     projection: &ProjectionExec,
     spm: &SortPreservingMergeExec,
 ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-    // If the projection does not narrow the the schema, we should not try to push it down.
+    // If the projection does not narrow the schema, we should not try to push it down.
     if projection.expr().len() >= projection.input().schema().fields().len() {
         return Ok(None);
     }
@@ -916,7 +916,9 @@ fn update_expr(
                     .find_map(|(index, (projected_expr, alias))| {
                         projected_expr.as_any().downcast_ref::<Column>().and_then(
                             |projected_column| {
-                                column.name().eq(projected_column.name()).then(|| {
+                                (column.name().eq(projected_column.name())
+                                    && column.index() == projected_column.index())
+                                .then(|| {
                                     state = RewriteState::RewrittenValid;
                                     Arc::new(Column::new(alias, index)) as _
                                 })
@@ -1437,12 +1439,12 @@ mod tests {
             )?),
         ];
         let projected_exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![
-            (Arc::new(Column::new("a", 0)), "a".to_owned()),
+            (Arc::new(Column::new("a", 3)), "a".to_owned()),
             (Arc::new(Column::new("b", 1)), "b_new".to_owned()),
-            (Arc::new(Column::new("c", 2)), "c".to_owned()),
-            (Arc::new(Column::new("d", 3)), "d_new".to_owned()),
-            (Arc::new(Column::new("e", 4)), "e".to_owned()),
-            (Arc::new(Column::new("f", 5)), "f_new".to_owned()),
+            (Arc::new(Column::new("c", 0)), "c".to_owned()),
+            (Arc::new(Column::new("d", 2)), "d_new".to_owned()),
+            (Arc::new(Column::new("e", 5)), "e".to_owned()),
+            (Arc::new(Column::new("f", 4)), "f_new".to_owned()),
         ];
 
         let expected_exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index ca9b918ff3..e5cbf31c83 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -602,22 +602,23 @@ CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100);
 statement ok
 CREATE TABLE t2(a text, b int) AS VALUES ('Alice', 2), ('Alice', 1);
 
-# the current query results are incorrect, becuase the query was incorrectly rewritten as:
-# SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t1.b;
-# the difference is ORDER BY clause rewrite from t2.b to t1.b, it is incorrect.
-# after https://github.com/apache/arrow-datafusion/issues/8374 fixed, the correct result should be:
-# Alice 50
-# Alice 100
-# Alice 50
-# Alice 100
+# test 'ORDER BY' joined result with same column name
 query TI
-SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t1.b;
 ----
 Alice 50
 Alice 50
 Alice 100
 Alice 100
 
+query TI
+SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
+----
+Alice 50
+Alice 100
+Alice 50
+Alice 100
+
 query TITI
 SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
 ----
@@ -663,3 +664,25 @@ DROP TABLE t1;
 
 statement ok
 DROP TABLE t2;
+
+# sort by company name and then by lead name
+statement ok
+CREATE TABLE companies(name VARCHAR, employees INT) AS VALUES ('Jeyork', 150),('Shalk', 350),('ShuttlP', 75)
+
+statement ok
+CREATE TABLE leads(name VARCHAR, company VARCHAR) AS VALUES ('Alex F', 'Jeyork'),('John B', 'Shalk'),('Samanta J', 'ShuttlP'),('Trevor R', 'Jeyork'),('Alice B', 'ShuttlP')
+
+query TT
+SELECT l.* FROM leads l LEFT JOIN companies c ON c."name" = l."company" ORDER BY c."name", l."name"
+----
+Alex F Jeyork
+Trevor R Jeyork
+John B Shalk
+Alice B ShuttlP
+Samanta J ShuttlP
+
+statement ok
+DROP TABLE companies
+
+statement ok
+DROP TABLE leads