You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/12/12 13:36:29 UTC

(arrow-datafusion) branch main updated: fix: incorrect set preserve_partitioning in SortExec (#8485)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f312c8a1d fix: incorrect set preserve_partitioning in SortExec (#8485)
7f312c8a1d is described below

commit 7f312c8a1d82b19f03c640e4a5d7d6437b2ee835
Author: Huaijin <ha...@gmail.com>
AuthorDate: Tue Dec 12 21:36:23 2023 +0800

    fix: incorrect set preserve_partitioning in SortExec (#8485)
    
    * fix: incorrect set preserve_partitioning in SortExec
    
    * add more comment
---
 .../src/physical_optimizer/projection_pushdown.rs  |  3 +-
 datafusion/sqllogictest/test_files/join.slt        | 37 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 67a2eaf0d9..664afbe822 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -454,7 +454,8 @@ fn try_swapping_with_sort(
 
     Ok(Some(Arc::new(
         SortExec::new(updated_exprs, make_with_child(projection, sort.input())?)
-            .with_fetch(sort.fetch()),
+            .with_fetch(sort.fetch())
+            .with_preserve_partitioning(sort.preserve_partitioning()),
     )))
 }
 
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 386ffe766b..c9dd7ca604 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -594,3 +594,40 @@ drop table IF EXISTS full_join_test;
 # batch size
 statement ok
 set datafusion.execution.batch_size = 8192;
+
+# related to: https://github.com/apache/arrow-datafusion/issues/8374
+statement ok
+CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100);
+
+statement ok
+CREATE TABLE t2(a text, b int) AS VALUES ('Alice', 2), ('Alice', 1);
+
+# the current query results are incorrect, becuase the query was incorrectly rewritten as:
+# SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t1.b;
+# the difference is ORDER BY clause rewrite from t2.b to t1.b, it is incorrect.
+# after https://github.com/apache/arrow-datafusion/issues/8374 fixed, the correct result should be:
+# Alice 50
+# Alice 100
+# Alice 50
+# Alice 100
+query TI
+SELECT t1.a, t1.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
+----
+Alice 50
+Alice 50
+Alice 100
+Alice 100
+
+query TITI
+SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a ORDER BY t1.a, t2.b;
+----
+Alice 50 Alice 1
+Alice 100 Alice 1
+Alice 50 Alice 2
+Alice 100 Alice 2
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;