You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/19 15:22:05 UTC
[arrow-datafusion] branch vectorized_collision updated: Initial PoC
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch vectorized_collision
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/vectorized_collision by this push:
new 207a75b5f8 Initial PoC
207a75b5f8 is described below
commit 207a75b5f8eb67973bd9fe7186bd5d4f8d67290e
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Mon Jun 19 17:21:58 2023 +0200
Initial PoC
---
.../core/src/physical_plan/joins/hash_join.rs | 25 ++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs
index 9d016a60f4..f973b3766b 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join.rs
@@ -724,6 +724,31 @@ pub fn build_equal_condition_join_indices(
// Using a buffer builder to avoid slower normal builder
let mut build_indices = UInt64BufferBuilder::new(0);
let mut probe_indices = UInt32BufferBuilder::new(0);
+
+ let mut to_check: Vec<(u64, usize)> = hash_values
+ .iter()
+ .enumerate()
+ .flat_map(|(row, hash_value)| {
+ build_hashmap
+ .map
+ .get(*hash_value, |(hash, _)| *hash_value == *hash)
+ .map(|(_, v)| (*v - 1, row))
+ })
+ .collect();
+
+ while to_check.len() > 0 {
+ // Perform column-wise (vectorized) equality check
+
+ // check next items
+ to_check = to_check
+ .iter()
+ .flat_map(|(index, row)| {
+ let next = build_hashmap.next[*index as usize];
+ (next != 0).then(|| (next - 1, *row))
+ })
+ .collect();
+ }
+
// Visit all of the probe rows
for (row, hash_value) in hash_values.iter().enumerate() {
// Get the hash and find it in the build index