You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/19 15:22:05 UTC

[arrow-datafusion] branch vectorized_collision updated: Initial PoC

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch vectorized_collision
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/vectorized_collision by this push:
     new 207a75b5f8 Initial PoC
207a75b5f8 is described below

commit 207a75b5f8eb67973bd9fe7186bd5d4f8d67290e
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Mon Jun 19 17:21:58 2023 +0200

    Initial PoC
---
 .../core/src/physical_plan/joins/hash_join.rs      | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs
index 9d016a60f4..f973b3766b 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join.rs
@@ -724,6 +724,31 @@ pub fn build_equal_condition_join_indices(
     // Using a buffer builder to avoid slower normal builder
     let mut build_indices = UInt64BufferBuilder::new(0);
     let mut probe_indices = UInt32BufferBuilder::new(0);
+
+    let mut to_check: Vec<(u64, usize)> = hash_values
+        .iter()
+        .enumerate()
+        .flat_map(|(row, hash_value)| {
+            build_hashmap
+                .map
+                .get(*hash_value, |(hash, _)| *hash_value == *hash)
+                .map(|(_, v)| (*v - 1, row))
+        })
+        .collect();
+
+    while to_check.len() > 0 {
+        // Perform column-wise (vectorized) equality check
+
+        // check next items
+        to_check = to_check
+            .iter()
+            .flat_map(|(index, row)| {
+                let next = build_hashmap.next[*index as usize];
+                (next != 0).then(|| (next - 1, *row))
+            })
+            .collect();
+    }
+
     // Visit all of the probe rows
     for (row, hash_value) in hash_values.iter().enumerate() {
         // Get the hash and find it in the build index