You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/28 18:47:21 UTC

[arrow-datafusion] branch bucketing updated: Bucketed hash join

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch bucketing
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/bucketing by this push:
     new cbfb01a875 Bucketed hash join
cbfb01a875 is described below

commit cbfb01a875bc80158912847b8f0b9008a7430a23
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Wed Jun 28 20:47:13 2023 +0200

    Bucketed hash join
---
 datafusion/core/src/physical_plan/joins/hash_join_utils.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs b/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
index b3ac1f392b..927b3667a5 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
@@ -90,7 +90,7 @@ use datafusion_common::Result;
 // TODO: speed up collision checks
 // https://github.com/apache/arrow-datafusion/issues/50
 pub struct JoinHashMap {
-    // Stores hash value to first index
+    // Stores first index in bucket
     pub map: Vec<u64>,
     // Stores indices in chained list data structure
     pub next: Vec<u64>,
@@ -103,8 +103,8 @@ pub struct SymmetricJoinHashMap(pub RawTable<(u64, SmallVec<[u64; 1]>)>);
 impl JoinHashMap {
     pub(crate) fn with_capacity(capacity: usize) -> Self {
         JoinHashMap {
-            // Overallocate using 2 x the buckets
-            map: vec![0; capacity * 2],
+            // Overallocate using 4 x the buckets
+            map: vec![0; capacity * 8],
             next: vec![0; capacity],
         }
     }