You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/28 18:47:21 UTC
[arrow-datafusion] branch bucketing updated: Bucketed hash join
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch bucketing
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/bucketing by this push:
new cbfb01a875 Bucketed hash join
cbfb01a875 is described below
commit cbfb01a875bc80158912847b8f0b9008a7430a23
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Wed Jun 28 20:47:13 2023 +0200
Bucketed hash join
---
datafusion/core/src/physical_plan/joins/hash_join_utils.rs | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs b/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
index b3ac1f392b..927b3667a5 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join_utils.rs
@@ -90,7 +90,7 @@ use datafusion_common::Result;
// TODO: speed up collision checks
// https://github.com/apache/arrow-datafusion/issues/50
pub struct JoinHashMap {
- // Stores hash value to first index
+ // Stores first index in bucket
pub map: Vec<u64>,
// Stores indices in chained list data structure
pub next: Vec<u64>,
@@ -103,8 +103,8 @@ pub struct SymmetricJoinHashMap(pub RawTable<(u64, SmallVec<[u64; 1]>)>);
impl JoinHashMap {
pub(crate) fn with_capacity(capacity: usize) -> Self {
JoinHashMap {
- // Overallocate using 2 x the buckets
- map: vec![0; capacity * 2],
+ // Overallocate using 4 x the buckets
+ map: vec![0; capacity * 8],
next: vec![0; capacity],
}
}