You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2022/10/28 20:57:13 UTC

[arrow-datafusion] branch master updated: Don't add projection for semi joins in HashBuildProbeOrder (#4000)

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 925a96225 Don't add projection for semi joins in HashBuildProbeOrder (#4000)
925a96225 is described below

commit 925a96225e2142d8adc53a3323ddf612fffe2007
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Fri Oct 28 22:57:07 2022 +0200

    Don't add projection for semi joins in HashBuildProbeOrder (#4000)
    
    * WIP
    
    * Add test case
---
 .../physical_optimizer/hash_build_probe_order.rs   | 49 ++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs b/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs
index 014921046..8b9279e2c 100644
--- a/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs
+++ b/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs
@@ -174,6 +174,13 @@ impl PhysicalOptimizerRule for HashBuildProbeOrder {
                     *hash_join.partition_mode(),
                     hash_join.null_equals_null(),
                 )?;
+                if matches!(
+                    hash_join.join_type(),
+                    JoinType::LeftSemi | JoinType::RightSemi
+                ) {
+                    return Ok(Arc::new(new_join));
+                }
+
                 let proj = ProjectionExec::try_new(
                     swap_reverting_projection(&left.schema(), &right.schema()),
                     Arc::new(new_join),
@@ -354,6 +361,48 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn test_join_with_swap_left_semi() {
+        let (big, small) = create_big_and_small();
+
+        let join = HashJoinExec::try_new(
+            Arc::clone(&big),
+            Arc::clone(&small),
+            vec![(
+                Column::new_with_schema("big_col", &big.schema()).unwrap(),
+                Column::new_with_schema("small_col", &small.schema()).unwrap(),
+            )],
+            None,
+            &JoinType::LeftSemi,
+            PartitionMode::CollectLeft,
+            &false,
+        )
+        .unwrap();
+
+        let original_schema = join.schema();
+
+        let optimized_join = HashBuildProbeOrder::new()
+            .optimize(Arc::new(join), &SessionConfig::new())
+            .unwrap();
+
+        let swapped_join = optimized_join
+            .as_any()
+            .downcast_ref::<HashJoinExec>()
+            .expect(
+                "A proj is not required to swap columns back to their original order",
+            );
+
+        assert_eq!(swapped_join.schema().fields().len(), 1);
+
+        assert_eq!(swapped_join.left().statistics().total_byte_size, Some(10));
+        assert_eq!(
+            swapped_join.right().statistics().total_byte_size,
+            Some(100000)
+        );
+
+        assert_eq!(original_schema, swapped_join.schema());
+    }
+
     /// Compare the input plan with the plan after running the probe order optimizer.
     macro_rules! assert_optimized {
         ($EXPECTED_LINES: expr, $PLAN: expr) => {