You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2022/10/28 20:57:13 UTC
[arrow-datafusion] branch master updated: Don't add projection for semi joins in HashBuildProbeOrder (#4000)
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 925a96225 Don't add projection for semi joins in HashBuildProbeOrder (#4000)
925a96225 is described below
commit 925a96225e2142d8adc53a3323ddf612fffe2007
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Fri Oct 28 22:57:07 2022 +0200
Don't add projection for semi joins in HashBuildProbeOrder (#4000)
* WIP
* Add test case
---
.../physical_optimizer/hash_build_probe_order.rs | 49 ++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs b/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs
index 014921046..8b9279e2c 100644
--- a/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs
+++ b/datafusion/core/src/physical_optimizer/hash_build_probe_order.rs
@@ -174,6 +174,13 @@ impl PhysicalOptimizerRule for HashBuildProbeOrder {
*hash_join.partition_mode(),
hash_join.null_equals_null(),
)?;
+ if matches!(
+ hash_join.join_type(),
+ JoinType::LeftSemi | JoinType::RightSemi
+ ) {
+ return Ok(Arc::new(new_join));
+ }
+
let proj = ProjectionExec::try_new(
swap_reverting_projection(&left.schema(), &right.schema()),
Arc::new(new_join),
@@ -354,6 +361,48 @@ mod tests {
);
}
+ #[tokio::test]
+ async fn test_join_with_swap_left_semi() {
+ let (big, small) = create_big_and_small();
+
+ let join = HashJoinExec::try_new(
+ Arc::clone(&big),
+ Arc::clone(&small),
+ vec![(
+ Column::new_with_schema("big_col", &big.schema()).unwrap(),
+ Column::new_with_schema("small_col", &small.schema()).unwrap(),
+ )],
+ None,
+ &JoinType::LeftSemi,
+ PartitionMode::CollectLeft,
+ &false,
+ )
+ .unwrap();
+
+ let original_schema = join.schema();
+
+ let optimized_join = HashBuildProbeOrder::new()
+ .optimize(Arc::new(join), &SessionConfig::new())
+ .unwrap();
+
+ let swapped_join = optimized_join
+ .as_any()
+ .downcast_ref::<HashJoinExec>()
+ .expect(
+ "A proj is not required to swap columns back to their original order",
+ );
+
+ assert_eq!(swapped_join.schema().fields().len(), 1);
+
+ assert_eq!(swapped_join.left().statistics().total_byte_size, Some(10));
+ assert_eq!(
+ swapped_join.right().statistics().total_byte_size,
+ Some(100000)
+ );
+
+ assert_eq!(original_schema, swapped_join.schema());
+ }
+
/// Compare the input plan with the plan after running the probe order optimizer.
macro_rules! assert_optimized {
($EXPECTED_LINES: expr, $PLAN: expr) => {