You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/19 11:37:58 UTC

[arrow-datafusion] branch adapt_datastructure updated: Update / simplify memory calculation with new datastructure

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch adapt_datastructure
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/adapt_datastructure by this push:
     new 6d05fb4a93 Update / simplify memory calculation with new datastructure
6d05fb4a93 is described below

commit 6d05fb4a9381f6c7ba0058055ca4ea6ff01c5258
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Mon Jun 19 13:37:52 2023 +0200

    Update / simplify memory calculation with new datastructure
---
 datafusion/core/src/physical_plan/joins/hash_join.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs
index 6491210231..fcfcc746e6 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join.rs
@@ -44,6 +44,7 @@ use arrow::{
 };
 use futures::{ready, Stream, StreamExt, TryStreamExt};
 use std::fmt;
+use std::mem::size_of;
 use std::sync::Arc;
 use std::task::Poll;
 use std::{any::Any, usize, vec};
@@ -508,10 +509,10 @@ async fn collect_left_input(
         )
     })? / 7)
         .next_power_of_two();
-    // 32 bytes per `(u64, SmallVec<[u64; 1]>)`
+    // 16 bytes per `(u64, u64)`
     // + 1 byte for each bucket
-    // + 16 bytes fixed
-    let estimated_hastable_size = 32 * estimated_buckets + estimated_buckets + 16;
+    // + fixed size of JoinHashMap (RawTable + Vec)
+    let estimated_hastable_size = 16 * estimated_buckets + estimated_buckets + size_of::<JoinHashMap>();
 
     reservation.try_grow(estimated_hastable_size)?;
     metrics.build_mem_used.add(estimated_hastable_size);