You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by yj...@apache.org on 2022/04/16 10:32:49 UTC
[arrow-datafusion] branch master updated: Fix join without constraints (#2240)
This is an automated email from the ASF dual-hosted git repository.
yjshen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new aa609f6dc Fix join without constraints (#2240)
aa609f6dc is described below
commit aa609f6dc8f88ad508b3e8f08e4b078b487aa434
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Sat Apr 16 12:32:44 2022 +0200
Fix join without constraints (#2240)
* Fix join without constraints
* Simplify using reduce
* Simplify
---
datafusion/core/src/physical_plan/hash_join.rs | 6 ++++++
datafusion/core/src/sql/planner.rs | 17 +++++++++--------
datafusion/core/tests/sql/joins.rs | 18 ++++++++++++++++++
3 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/datafusion/core/src/physical_plan/hash_join.rs b/datafusion/core/src/physical_plan/hash_join.rs
index 44b8cc97d..488222acc 100644
--- a/datafusion/core/src/physical_plan/hash_join.rs
+++ b/datafusion/core/src/physical_plan/hash_join.rs
@@ -189,6 +189,12 @@ impl HashJoinExec {
) -> Result<Self> {
let left_schema = left.schema();
let right_schema = right.schema();
+ if on.is_empty() {
+ return Err(DataFusionError::Plan(
+ "On constraints in HashJoinExec should be non-empty".to_string(),
+ ));
+ }
+
check_join_is_valid(&left_schema, &right_schema, &on)?;
let (schema, column_indices) =
diff --git a/datafusion/core/src/sql/planner.rs b/datafusion/core/src/sql/planner.rs
index 536682cf0..cff38d47b 100644
--- a/datafusion/core/src/sql/planner.rs
+++ b/datafusion/core/src/sql/planner.rs
@@ -524,7 +524,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
keys.into_iter().unzip();
// return the logical plan representing the join
- if filter.is_empty() {
+ if left_keys.is_empty() {
+ // When we don't have join keys, use cross join
+ let join = LogicalPlanBuilder::from(left).cross_join(&right)?;
+
+ join.filter(filter.into_iter().reduce(Expr::and).unwrap())?
+ .build()
+ } else if filter.is_empty() {
let join = LogicalPlanBuilder::from(left).join(
&right,
join_type,
@@ -537,13 +543,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
join_type,
(left_keys, right_keys),
)?;
- let join_filter_init = filter.remove(0);
- join.filter(
- filter
- .into_iter()
- .fold(join_filter_init, |acc, e| acc.and(e)),
- )?
- .build()
+ join.filter(filter.into_iter().reduce(Expr::and).unwrap())?
+ .build()
}
// Left join with all non-equijoin expressions from the right
// l left join r
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index d8f5d4ad8..485904157 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -293,6 +293,24 @@ async fn left_join_not_null_filter_on_join_column() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn self_join_non_equijoin() -> Result<()> {
+ let ctx = create_join_context_with_nulls()?;
+ let sql =
+ "SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND y.t1_id = 44";
+ let expected = vec![
+ "+-------+-------+",
+ "| t1_id | t1_id |",
+ "+-------+-------+",
+ "| 11 | 44 |",
+ "+-------+-------+",
+ ];
+
+ let actual = execute_to_batches(&ctx, sql).await;
+ assert_batches_eq!(expected, &actual);
+ Ok(())
+}
+
#[tokio::test]
async fn right_join_null_filter() -> Result<()> {
let ctx = create_join_context_with_nulls()?;