You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2021/08/10 06:50:26 UTC
[arrow-datafusion] branch master updated: Fix right,
full join handling when having multiple non-matching rows at the
left side (#845)
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 5cadc6a Fix right, full join handling when having multiple non-matching rows at the left side (#845)
5cadc6a is described below
commit 5cadc6a45a7a58db8c1c9b2c4834953233d365e3
Author: Daniƫl Heres <da...@gmail.com>
AuthorDate: Tue Aug 10 08:50:19 2021 +0200
Fix right, full join handling when having multiple non-matching rows at the left side (#845)
* Fix right, full join handling
* Remove comment
* Clippy
* Update datafusion/src/physical_plan/hash_join.rs
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* Fmt
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
datafusion/src/physical_plan/hash_join.rs | 20 +++++++++-----------
datafusion/tests/sql.rs | 6 ------
2 files changed, 9 insertions(+), 17 deletions(-)
diff --git a/datafusion/src/physical_plan/hash_join.rs b/datafusion/src/physical_plan/hash_join.rs
index fa75437..9970824 100644
--- a/datafusion/src/physical_plan/hash_join.rs
+++ b/datafusion/src/physical_plan/hash_join.rs
@@ -737,6 +737,7 @@ fn build_join_indexes(
for (row, hash_value) in hash_values.iter().enumerate() {
match left.0.get(*hash_value, |(hash, _)| *hash_value == *hash) {
Some((_, indices)) => {
+ let mut no_match = true;
for &i in indices {
if equal_rows(
i as usize,
@@ -745,9 +746,14 @@ fn build_join_indexes(
&keys_values,
)? {
left_indices.append_value(i)?;
- } else {
- left_indices.append_null()?;
+ right_indices.append_value(row as u32)?;
+ no_match = false;
}
+ }
+ // If no rows matched left, still must keep the right
+ // with all nulls for left
+ if no_match {
+ left_indices.append_null()?;
right_indices.append_value(row as u32)?;
}
}
@@ -768,7 +774,7 @@ macro_rules! equal_rows_elem {
let left_array = $l.as_any().downcast_ref::<$array_type>().unwrap();
let right_array = $r.as_any().downcast_ref::<$array_type>().unwrap();
- match (left_array.is_null($left), left_array.is_null($right)) {
+ match (left_array.is_null($left), right_array.is_null($right)) {
(false, false) => left_array.value($left) == right_array.value($right),
_ => false,
}
@@ -1372,8 +1378,6 @@ mod tests {
}
#[tokio::test]
- // Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
- #[cfg(not(feature = "force_hash_collisions"))]
async fn join_full_multi_batch() {
let left = build_table(
("a1", &vec![1, 2, 3]),
@@ -1639,8 +1643,6 @@ mod tests {
}
#[tokio::test]
- // Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
- #[cfg(not(feature = "force_hash_collisions"))]
async fn join_right_one() -> Result<()> {
let left = build_table(
("a1", &vec![1, 2, 3]),
@@ -1677,8 +1679,6 @@ mod tests {
}
#[tokio::test]
- // Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
- #[cfg(not(feature = "force_hash_collisions"))]
async fn partitioned_join_right_one() -> Result<()> {
let left = build_table(
("a1", &vec![1, 2, 3]),
@@ -1716,8 +1716,6 @@ mod tests {
}
#[tokio::test]
- // Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
- #[cfg(not(feature = "force_hash_collisions"))]
async fn join_full_one() -> Result<()> {
let left = build_table(
("a1", &vec![1, 2, 3]),
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 046e4f2..0c33bd4 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -1797,8 +1797,6 @@ async fn equijoin_left_and_condition_from_right() -> Result<()> {
}
#[tokio::test]
-// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
-#[cfg(not(feature = "force_hash_collisions"))]
async fn equijoin_right_and_condition_from_left() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let sql =
@@ -1852,8 +1850,6 @@ async fn left_join() -> Result<()> {
}
#[tokio::test]
-// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
-#[cfg(not(feature = "force_hash_collisions"))]
async fn right_join() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let equivalent_sql = [
@@ -1874,8 +1870,6 @@ async fn right_join() -> Result<()> {
}
#[tokio::test]
-// Disable until https://github.com/apache/arrow-datafusion/issues/843 fixed
-#[cfg(not(feature = "force_hash_collisions"))]
async fn full_join() -> Result<()> {
let mut ctx = create_join_context("t1_id", "t2_id")?;
let equivalent_sql = [