You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by yj...@apache.org on 2022/04/16 10:32:49 UTC

[arrow-datafusion] branch master updated: Fix join without constraints (#2240)

This is an automated email from the ASF dual-hosted git repository.

yjshen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new aa609f6dc Fix join without constraints (#2240)
aa609f6dc is described below

commit aa609f6dc8f88ad508b3e8f08e4b078b487aa434
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Sat Apr 16 12:32:44 2022 +0200

    Fix join without constraints (#2240)
    
    * Fix join without constraints
    
    * Simplify using reduce
    
    * Simplify
---
 datafusion/core/src/physical_plan/hash_join.rs |  6 ++++++
 datafusion/core/src/sql/planner.rs             | 17 +++++++++--------
 datafusion/core/tests/sql/joins.rs             | 18 ++++++++++++++++++
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/src/physical_plan/hash_join.rs b/datafusion/core/src/physical_plan/hash_join.rs
index 44b8cc97d..488222acc 100644
--- a/datafusion/core/src/physical_plan/hash_join.rs
+++ b/datafusion/core/src/physical_plan/hash_join.rs
@@ -189,6 +189,12 @@ impl HashJoinExec {
     ) -> Result<Self> {
         let left_schema = left.schema();
         let right_schema = right.schema();
+        if on.is_empty() {
+            return Err(DataFusionError::Plan(
+                "On constraints in HashJoinExec should be non-empty".to_string(),
+            ));
+        }
+
         check_join_is_valid(&left_schema, &right_schema, &on)?;
 
         let (schema, column_indices) =
diff --git a/datafusion/core/src/sql/planner.rs b/datafusion/core/src/sql/planner.rs
index 536682cf0..cff38d47b 100644
--- a/datafusion/core/src/sql/planner.rs
+++ b/datafusion/core/src/sql/planner.rs
@@ -524,7 +524,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     keys.into_iter().unzip();
 
                 // return the logical plan representing the join
-                if filter.is_empty() {
+                if left_keys.is_empty() {
+                    // When we don't have join keys, use cross join
+                    let join = LogicalPlanBuilder::from(left).cross_join(&right)?;
+
+                    join.filter(filter.into_iter().reduce(Expr::and).unwrap())?
+                        .build()
+                } else if filter.is_empty() {
                     let join = LogicalPlanBuilder::from(left).join(
                         &right,
                         join_type,
@@ -537,13 +543,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         join_type,
                         (left_keys, right_keys),
                     )?;
-                    let join_filter_init = filter.remove(0);
-                    join.filter(
-                        filter
-                            .into_iter()
-                            .fold(join_filter_init, |acc, e| acc.and(e)),
-                    )?
-                    .build()
+                    join.filter(filter.into_iter().reduce(Expr::and).unwrap())?
+                        .build()
                 }
                 // Left join with all non-equijoin expressions from the right
                 // l left join r
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index d8f5d4ad8..485904157 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -293,6 +293,24 @@ async fn left_join_not_null_filter_on_join_column() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn self_join_non_equijoin() -> Result<()> {
+    let ctx = create_join_context_with_nulls()?;
+    let sql =
+        "SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND y.t1_id = 44";
+    let expected = vec![
+        "+-------+-------+",
+        "| t1_id | t1_id |",
+        "+-------+-------+",
+        "| 11    | 44    |",
+        "+-------+-------+",
+    ];
+
+    let actual = execute_to_batches(&ctx, sql).await;
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
 #[tokio::test]
 async fn right_join_null_filter() -> Result<()> {
     let ctx = create_join_context_with_nulls()?;