You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by dh...@apache.org on 2023/06/19 11:37:58 UTC
[arrow-datafusion] branch adapt_datastructure updated: Update / simplify memory calculation with new datastructure
This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch adapt_datastructure
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/adapt_datastructure by this push:
new 6d05fb4a93 Update / simplify memory calculation with new datastructure
6d05fb4a93 is described below
commit 6d05fb4a9381f6c7ba0058055ca4ea6ff01c5258
Author: Daniƫl Heres <da...@coralogix.com>
AuthorDate: Mon Jun 19 13:37:52 2023 +0200
Update / simplify memory calculation with new datastructure
---
datafusion/core/src/physical_plan/joins/hash_join.rs | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/core/src/physical_plan/joins/hash_join.rs
index 6491210231..fcfcc746e6 100644
--- a/datafusion/core/src/physical_plan/joins/hash_join.rs
+++ b/datafusion/core/src/physical_plan/joins/hash_join.rs
@@ -44,6 +44,7 @@ use arrow::{
};
use futures::{ready, Stream, StreamExt, TryStreamExt};
use std::fmt;
+use std::mem::size_of;
use std::sync::Arc;
use std::task::Poll;
use std::{any::Any, usize, vec};
@@ -508,10 +509,10 @@ async fn collect_left_input(
)
})? / 7)
.next_power_of_two();
- // 32 bytes per `(u64, SmallVec<[u64; 1]>)`
+ // 16 bytes per `(u64, u64)`
// + 1 byte for each bucket
- // + 16 bytes fixed
- let estimated_hastable_size = 32 * estimated_buckets + estimated_buckets + 16;
+ // + fixed size of JoinHashMap (RawTable + Vec)
+ let estimated_hastable_size = 16 * estimated_buckets + estimated_buckets + size_of::<JoinHashMap>();
reservation.try_grow(estimated_hastable_size)?;
metrics.build_mem_used.add(estimated_hastable_size);